diff options
Diffstat (limited to 'src/runtime/gpu/cl')
66 files changed, 0 insertions, 8195 deletions
diff --git a/src/runtime/gpu/cl/IClOperator.h b/src/runtime/gpu/cl/IClOperator.h deleted file mode 100644 index 049bf05dc1..0000000000 --- a/src/runtime/gpu/cl/IClOperator.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ICL_OPERATOR_H -#define ARM_COMPUTE_ICL_OPERATOR_H - -#include "arm_compute/core/ITensorInfo.h" -#include "arm_compute/runtime/CL/ICLOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -using IClOperator = experimental::ICLOperator; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_ICL_OPERATOR_H */ diff --git a/src/runtime/gpu/cl/operators/ClActivation.cpp b/src/runtime/gpu/cl/operators/ClActivation.cpp deleted file mode 100644 index 34a2f94fdc..0000000000 --- a/src/runtime/gpu/cl/operators/ClActivation.cpp +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClActivation.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClActivationKernel.h" - -#include "src/common/IOperator.h" -#include "src/common/utils/LegacySupport.h" -#include "src/gpu/cl/ClContext.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClActivation::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique<kernels::ClActivationKernel>(); - k->configure(compile_context, src, dst, act_info); - _kernel = std::move(k); -} - -Status ClActivation::validate(const ITensorInfo *src, const ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - return kernels::ClActivationKernel::validate(src, dst, act_info); -} -} // namespace opencl - -namespace gpu -{ -namespace opencl -{ -std::tuple<IOperator *, StatusCode> ClContext::create_activation(const AclTensorDescriptor &src, const AclTensorDescriptor &dst, const AclActivationDescriptor &act, bool is_validate) -{ - TensorInfo src_info = detail::convert_to_legacy_tensor_info(src); - TensorInfo dst_info = detail::convert_to_legacy_tensor_info(dst); - auto info = detail::convert_to_activation_info(act); - - if(is_validate && !bool(arm_compute::opencl::ClActivation::validate(&src_info.set_is_resizable(false), &dst_info.set_is_resizable(false), info))) - { - return std::make_tuple(nullptr, StatusCode::UnsupportedConfig); - } - - auto act_op = std::make_unique<arm_compute::opencl::ClActivation>(); - act_op->configure(CLKernelLibrary::get().get_compile_context(), &src_info, &dst_info, info); - - auto op = new arm_compute::IOperator(static_cast<IContext *>(this)); - if(op == nullptr) - { - ARM_COMPUTE_LOG_ERROR_ACL("Couldn't allocate internal resources"); - return std::make_tuple(nullptr, StatusCode::OutOfMemory); - } - op->set_internal_operator(std::move(act_op)); - - return std::make_tuple(op, StatusCode::Success); -} -} // namespace opencl -} // namespace gpu -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClActivation.h b/src/runtime/gpu/cl/operators/ClActivation.h deleted file mode 100644 index 82ef8ac63a..0000000000 --- a/src/runtime/gpu/cl/operators/ClActivation.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_ACTIVATION_H -#define ARM_COMPUTE_CL_ACTIVATION_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClActivationKernel */ -class ClActivation : public IClOperator -{ -public: - /** Configure operator for a given list of arguments - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. - * @param[out] dst Destination tensor info. Data type supported: same as @p src - * @param[in] activation_info Activation layer parameters. - */ - void configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const ActivationLayerInfo &activation_info); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClActivation::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const ActivationLayerInfo &act_info); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_ACTIVATION_H */ diff --git a/src/runtime/gpu/cl/operators/ClAdd.cpp b/src/runtime/gpu/cl/operators/ClAdd.cpp deleted file mode 100644 index 01f550f819..0000000000 --- a/src/runtime/gpu/cl/operators/ClAdd.cpp +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClAdd.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClElementwiseKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClAdd::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, - ConvertPolicy policy, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique<kernels::ClSaturatedArithmeticKernel>(); - k->configure(compile_context, ArithmeticOperation::ADD, src1, src2, dst, policy, act_info); - _kernel = std::move(k); -} - -Status ClAdd::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, - ConvertPolicy policy, const ActivationLayerInfo &act_info) -{ - return kernels::ClSaturatedArithmeticKernel::validate(ArithmeticOperation::ADD, src1, src2, dst, policy, act_info); -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClAdd.h b/src/runtime/gpu/cl/operators/ClAdd.h deleted file mode 100644 index 7b84a767d6..0000000000 --- a/src/runtime/gpu/cl/operators/ClAdd.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_ADD_H -#define ARM_COMPUTE_CL_ADD_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run arithmetic addition - * - * @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. - * @note The function performs an arithmetic addition between two tensors. - */ -class ClAdd : public IClOperator -{ -public: - /** Configure function for a given list of arguments. - * - * Valid configurations (src1,src2) -> dst : - * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (S16,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,S16) -> S16 - * - (S32,S32) -> S32 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - (QASYMM8,QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (QSYMM16,QSYMM16) -> QSYMM16 - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. - * The source tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] src2 Second source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. - * The source tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] dst Destination tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. - * @param[in] policy Policy to use to handle overflow. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, ConvertPolicy policy, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClAdd::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, ConvertPolicy policy, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_ADD_H */ diff --git a/src/runtime/gpu/cl/operators/ClCast.cpp b/src/runtime/gpu/cl/operators/ClCast.cpp deleted file mode 100644 index 3f54004aa7..0000000000 --- a/src/runtime/gpu/cl/operators/ClCast.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClCast.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClCastKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClCast::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy) -{ - auto k = std::make_unique<kernels::ClCastKernel>(); - k->configure(compile_context, src, dst, policy); - _kernel = std::move(k); -} - -Status ClCast::validate(const ITensorInfo *src, const ITensorInfo *dst, ConvertPolicy policy) -{ - return kernels::ClCastKernel::validate(src, dst, policy); -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClCast.h b/src/runtime/gpu/cl/operators/ClCast.h deleted file mode 100644 index 107eb2bfe9..0000000000 --- a/src/runtime/gpu/cl/operators/ClCast.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_CAST_H -#define ARM_COMPUTE_CL_CAST_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClCastKernel */ -class ClCast : public IClOperator -{ -public: - /** Configure operator for a given list of arguments - * - * @note Input data type must be different than output data type. - * - * Valid data layouts: - * - All - * - * Valid data type configurations: - * |src |dst | - * |:--------------|:--------------------------------------| - * |U8 | S8, U16, S16, U32, S32, F16, F32 | - * |U16 | U8, S8, S16, U32, S32, F16, F32 | - * |S16 | U8, S8, U16, U32, S32, F16, F32 | - * |U32 | U8, S8, U16, S16, S32, F16, F32 | - * |S32 | U8, S8, U16, S16, U32, F16, F32 | - * |F16 | U8, S8, U16, S16, U32, F32 | - * |F32 | U8, S8, U16, S16, U32, F16 | - * - * @param[in] compile_context The compile context to be used. - * @param[in] src The source tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. - * @param[out] dst The destinatio tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. - * @param[in] policy Conversion policy. - */ - void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClCast::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst, ConvertPolicy policy); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_CAST_H */ diff --git a/src/runtime/gpu/cl/operators/ClConcatenate.cpp b/src/runtime/gpu/cl/operators/ClConcatenate.cpp deleted file mode 100644 index d3c05eae78..0000000000 --- a/src/runtime/gpu/cl/operators/ClConcatenate.cpp +++ /dev/null @@ -1,247 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClConcatenate.h" - -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLScheduler.h" - -#include "src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h" -#include "src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h" -#include "src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h" -#include "src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h" -#include "src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h" -#include "src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "src/core/helpers/AutoConfiguration.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClConcatenate::configure(const CLCompileContext &compile_context, const std::vector<ITensorInfo *> &src_vector, ITensorInfo *dst, size_t axis) -{ - ARM_COMPUTE_ERROR_ON(dst == nullptr); - _axis = axis; - _num_inputs = src_vector.size(); - - TensorShape dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(src_vector, _axis); - std::vector<const ITensorInfo *> const_src_vector(src_vector.size()); - std::transform(src_vector.begin(), src_vector.end(), const_src_vector.begin(), [](ITensorInfo * t) - { - ARM_COMPUTE_ERROR_ON_NULLPTR(t); - return t; - }); - - // dst auto inizialitation if not yet initialized - auto_init_if_empty(*dst, dst_shape, 1, src_vector[0]->data_type()); - ARM_COMPUTE_ERROR_THROW_ON(ClConcatenate::validate(const_src_vector, dst, axis)); - - unsigned int offset = 0; - switch(_axis) - { - case Window::DimX: - { - switch(_num_inputs) - { - case 2: - { - // Configure WidthConcatenate2Tensors kernel - auto kernel = std::make_unique<kernels::ClWidthConcatenate2TensorsKernel>(); - kernel->configure(compile_context, src_vector.at(0), src_vector.at(1), dst); - _concat_kernels.emplace_back(std::move(kernel)); - break; - } - case 4: - { - // Configure WidthConcatenate4Tensors kernel - auto kernel = std::make_unique<kernels::ClWidthConcatenate4TensorsKernel>(); - kernel->configure(compile_context, src_vector.at(0), src_vector.at(1), src_vector.at(2), src_vector.at(3), dst); - _concat_kernels.emplace_back(std::move(kernel)); - break; - } - default: - { - // Configure generic case WidthConcatenate kernels - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = std::make_unique<kernels::ClWidthConcatenateKernel>(); - kernel->configure(compile_context, src_vector.at(i), offset, dst); - offset += src_vector.at(i)->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - } - break; - } - case Window::DimY: - { - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = std::make_unique<kernels::ClHeightConcatenateKernel>(); - kernel->configure(compile_context, src_vector.at(i), offset, dst); - offset += src_vector.at(i)->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - case Window::DimZ: - { - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = std::make_unique<kernels::ClDepthConcatenateKernel>(); - kernel->configure(compile_context, src_vector.at(i), offset, dst); - offset += src_vector.at(i)->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - case 3: - { - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = std::make_unique<kernels::ClBatchConcatenateKernel>(); - kernel->configure(compile_context, src_vector.at(i), offset, dst); - offset += src_vector.at(i)->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - default: - ARM_COMPUTE_ERROR("Axis not supported"); - } -} - -Status ClConcatenate::validate(const std::vector<const ITensorInfo *> &src_vector, const ITensorInfo *dst, size_t axis) -{ - ARM_COMPUTE_RETURN_ERROR_ON(dst == nullptr); - const unsigned int num_inputs = src_vector.size(); - - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst); - ARM_COMPUTE_RETURN_ERROR_ON(num_inputs < 2); - - unsigned int offset = 0; - switch(axis) - { - case Window::DimX: - { - switch(num_inputs) - { - case 2: - // Validate WidthConcatenate2Tensors kernels if there are 2 inputs - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src_vector[0], src_vector[1]); - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenate2TensorsKernel::validate(src_vector[0], src_vector[1], dst)); - break; - case 4: - // Validate WidthConcatenate4Tensors kernels if there are 4 inputs - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src_vector[0], src_vector[1], src_vector[2], src_vector[3]); - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenate4TensorsKernel::validate(src_vector[0], src_vector[1], src_vector[2], src_vector[3], dst)); - break; - default: - // Validate generic case of WidthConcatenate kernel - for(const auto &src : src_vector) - { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenateKernel::validate(src, offset, dst)); - offset += src->dimension(axis); - } - break; - } - break; - } - case Window::DimY: - { - for(const auto &src : src_vector) - { - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClHeightConcatenateKernel::validate(src, offset, dst)); - offset += src->dimension(axis); - } - break; - } - case Window::DimZ: - { - for(const auto &src : src_vector) - { - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDepthConcatenateKernel::validate(src, offset, dst)); - offset += src->dimension(axis); - } - break; - } - case 3: - { - for(const auto &src : src_vector) - { - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClBatchConcatenateKernel::validate(src, offset, dst)); - offset += src->dimension(axis); - } - break; - } - default: - ARM_COMPUTE_ERROR("Axis not supported"); - } - - if(dst->total_size() != 0) - { - TensorShape dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(src_vector, axis); - ARM_COMPUTE_RETURN_ERROR_ON(dst_shape.total_size() != dst->tensor_shape().total_size()); - } - - return Status{}; -} - -void ClConcatenate::run(ITensorPack &tensors) -{ - if(tensors.empty()) - { - ARM_COMPUTE_ERROR("No inputs provided"); - } - - if(static_cast<int>(tensors.size()) - 1 != static_cast<int>(_num_inputs)) - { - ARM_COMPUTE_ERROR("Configured with different number of inputs"); - } - - if(_axis == Window::DimX && (_num_inputs == 2 || _num_inputs == 4)) - { - ARM_COMPUTE_ERROR_ON(_concat_kernels.empty()); - CLScheduler::get().enqueue_op(*_concat_kernels.at(0), tensors, true); - } - else - { - int i = 0; - for(auto &k : _concat_kernels) - { - ITensorPack pack; - pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i)); - pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(ACL_DST)); - CLScheduler::get().enqueue_op(*k, pack, true); - ++i; - } - } -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClConcatenate.h b/src/runtime/gpu/cl/operators/ClConcatenate.h deleted file mode 100644 index 153400bd73..0000000000 --- a/src/runtime/gpu/cl/operators/ClConcatenate.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCONCATENATE_H -#define ARM_COMPUTE_CLCONCATENATE_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/IClKernel.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -#include <vector> - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels: - * - * -# @ref kernels::ClWidthConcatenateKernel (if underlying concatenation axis is 0). - * -# @ref kernels::ClHeightConcatenateKernel (if underlying concatenation axis is 1). - * -# @ref kernels::ClDepthConcatenateKernel (if underlying concatenation axis is 2). - * -# @ref kernels::ClBatchConcatenateKernel (if underlying concatenation axis is 3). - */ -class ClConcatenate : public IClOperator -{ -public: - ClConcatenate() = default; - /** Initialise the kernel's inputs vector and dst. - * - * @note Input and dst tensor dimensions preconditions defer depending on the concatenation axis. - * @note Preconditions can be found respectively at @ref kernels::ClWidthConcatenateKernel, - * @ref kernels::ClHeightConcatenateKernel and @ref kernels::ClDepthConcatenateKernel. - * - * - * @param[in] compile_context The compile context to be used. - * @param[in,out] src_vector The vectors containing all the tensors info to concatenate. Data types supported: All - * @param[out] dst Destination tensor info. Data types supported: same as @p src_vector. - * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. - */ - void configure(const ClCompileContext &compile_context, const std::vector<ITensorInfo *> &src_vector, ITensorInfo *dst, size_t axis); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClConcatenate::configure() - * - * @return a status - */ - static Status validate(const std::vector<const ITensorInfo *> &src_vector, const ITensorInfo *dst, size_t axis); - - // Inherited methods overridden: - void run(ITensorPack &tensors) override; - -private: - std::vector<std::unique_ptr<IClKernel>> _concat_kernels{}; - unsigned int _num_inputs{ 0 }; - unsigned int _axis{ 0 }; -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_CONCATENATE_H */ diff --git a/src/runtime/gpu/cl/operators/ClConv2d.cpp b/src/runtime/gpu/cl/operators/ClConv2d.cpp deleted file mode 100644 index 0cb3a968e6..0000000000 --- a/src/runtime/gpu/cl/operators/ClConv2d.cpp +++ /dev/null @@ -1,292 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClConv2d.h" - -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/utils/quantization/AsymmHelpers.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h" -#include "src/runtime/gpu/cl/operators/ClDirectConv2d.h" -#include "src/runtime/gpu/cl/operators/ClGemmConv2d.h" -#include "src/runtime/gpu/cl/operators/ClWinogradConv2d.h" - -#include <memory> - -namespace -{ -/** Get the suitable kernel size for using direct convolution method with NHWC data layout. - * - * @note Direct convolution should be executed when the kernel has the spatial dimensions greater than or equal to the value returned by this function - * - * @param[in] gpu_target GPU target - * - * @return the suitable kernel size for using direct convolution method with NHWC data layout - */ -size_t get_direct_conv_kernel_threshold_nhwc(arm_compute::GPUTarget gpu_target) -{ - switch(gpu_target) - { - case arm_compute::GPUTarget::G76: - case arm_compute::GPUTarget::G77: - case arm_compute::GPUTarget::G78: - return 5; - case arm_compute::GPUTarget::G71: - case arm_compute::GPUTarget::G72: - case arm_compute::GPUTarget::MIDGARD: - case arm_compute::GPUTarget::BIFROST: - return 7; - default: - return 5; - } -} -} // namespace - -namespace arm_compute -{ -namespace opencl -{ -using namespace arm_compute::misc::shape_calculator; - -ClConv2d::ClConv2d() - : _operator() -{ -} - -ClConv2d::~ClConv2d() = default; - -void ClConv2d::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const Conv2dInfo &conv2d_info, - const WeightsInfo &weights_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst); - ARM_COMPUTE_ERROR_THROW_ON(ClConv2d::validate(src, weights, ((biases != nullptr) ? biases : nullptr), dst, conv2d_info, weights_info)); - - switch(ClConv2d::get_convolution_method(src, weights, dst, conv2d_info, weights_info, CLScheduler::get().target())) - { - case ConvolutionMethod::WINOGRAD: - { - ARM_COMPUTE_ERROR_ON(conv2d_info.num_groups != 1); - auto f = std::make_unique<ClWinogradConv2d>(); - f->configure(compile_context, src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info, conv2d_info.enable_fast_math); - _operator = std::move(f); - break; - } - case ConvolutionMethod::DIRECT: - { - ARM_COMPUTE_ERROR_ON(conv2d_info.num_groups != 1); - auto f = std::make_unique<ClDirectConv2d>(); - f->configure(compile_context, src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info); - _operator = std::move(f); - break; - } - case ConvolutionMethod::GEMM: - { - auto f = std::make_unique<ClGemmConv2d>(); - f->configure(compile_context, src, weights, biases, dst, conv2d_info, weights_info); - _operator = std::move(f); - break; - } - default: - ARM_COMPUTE_ERROR("Not supported."); - break; - } - _aux_mem = _operator->workspace(); -} - -Status ClConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const Conv2dInfo &conv2d_info, - const WeightsInfo &weights_info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst); - ARM_COMPUTE_RETURN_ERROR_ON_MSG((conv2d_info.num_groups != 1) && (src->data_layout() != DataLayout::NCHW), "Grouping (num_groups != 1) with NHWC data layout is not supported"); - - const GPUTarget gpu_target = CLScheduler::get().target(); - - switch(ClConv2d::get_convolution_method(src, weights, dst, conv2d_info, weights_info, gpu_target)) - { - case ConvolutionMethod::WINOGRAD: - { - //Validate Winograd - ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.num_groups != 1, "Grouping (num_groups != 1) with ClWinogradConv2d is not supported"); - ARM_COMPUTE_RETURN_ON_ERROR(ClWinogradConv2d::validate(src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info, conv2d_info.enable_fast_math)); - break; - } - case ConvolutionMethod::DIRECT: - { - // Validate direct convolution layer - ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.num_groups != 1, "Grouping (num_groups != 1) with ClDirectConv2d is not supported"); - ARM_COMPUTE_RETURN_ON_ERROR(ClDirectConv2d::validate(src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info)); - break; - } - case ConvolutionMethod::GEMM: - { - // Validate gemm-based convolution layer - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmConv2d::validate(src, weights, biases, dst, conv2d_info, weights_info)); - break; - } - default: - ARM_COMPUTE_ERROR("Not supported."); - break; - } - - return Status{}; -} - -ConvolutionMethod ClConv2d::get_convolution_method(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const Conv2dInfo &conv2d_info, - const WeightsInfo &weights_info, const GPUTarget gpu_target) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(src); - ARM_COMPUTE_ERROR_ON_NULLPTR(dst); - ARM_COMPUTE_ERROR_ON_NULLPTR(weights); - ARM_COMPUTE_UNUSED(weights_info); - - const PadStrideInfo conv_info = conv2d_info.conv_info; - const ActivationLayerInfo act_info = conv2d_info.act_info; - const Size2D dilation = conv2d_info.dilation; - bool enable_fast_math = conv2d_info.enable_fast_math; - - const size_t idx_w = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH); - const size_t idx_h = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT); - const size_t idx_c = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::CHANNEL); - - /* Input spatial dims, kernel size, IFM/OFM, conv info*/ - using ConvolutionConfiguration = std::tuple<Size2D, Size2D, Size2D, PadStrideInfo, DataLayout>; - using ConfigurationMethod = std::pair<ConvolutionConfiguration, ConvolutionMethod>; - - const std::vector<ConfigurationMethod> known_configs = - { - // Alexnet - ConfigurationMethod(ConvolutionConfiguration(Size2D(27U, 27U), Size2D(5U, 5U), Size2D(48U, 128U), PadStrideInfo(1U, 1U, 2U, 2U), DataLayout::NCHW), ConvolutionMethod::DIRECT), - // VGG16 / VGG19 - ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 64U), PadStrideInfo(1U, 1U, 1U, 1U), DataLayout::NCHW), ConvolutionMethod::DIRECT), - // Mobilenet 224 - ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NCHW), ConvolutionMethod::GEMM), - // Mobilenet 160 - ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NCHW), ConvolutionMethod::GEMM), - // Mobilenet 224 - ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NHWC), ConvolutionMethod::GEMM), - // Mobilenet 160 - ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NHWC), ConvolutionMethod::GEMM), - }; - - const auto find_config = [&](ConfigurationMethod c) - { - const ConvolutionConfiguration config = c.first; - const PadStrideInfo info = std::get<3>(config); - const DataLayout data_layout = std::get<4>(config); - - return std::get<0>(config) == Size2D(src->dimension(idx_w), src->dimension(idx_h)) && std::get<1>(config) == Size2D(weights->dimension(idx_w), weights->dimension(idx_h)) - && std::get<2>(config) == Size2D(weights->dimension(idx_c), weights->dimension(3)) && info.pad_top() == conv_info.pad_top() && info.pad_right() == conv_info.pad_right() - && info.pad_bottom() == conv_info.pad_bottom() && info.pad_left() == conv_info.pad_left() && info.stride() == conv_info.stride() && (data_layout == src->data_layout()); - }; - - std::vector<ConfigurationMethod>::const_iterator found; - if((found = std::find_if(known_configs.begin(), known_configs.end(), find_config)) != known_configs.end()) - { - return (*found).second; - } - - if(dilation != Size2D(1U, 1U)) - { - return ConvolutionMethod::GEMM; - } - else - { - if(src->data_layout() == DataLayout::NCHW) - { - // SRGAN - if((src->dimension(idx_h) > 720U) && (dst->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && (conv_info.pad_top() < 3) - && (ClDirectConv2d::validate(src, weights, nullptr, dst, conv_info, act_info))) - { - return ConvolutionMethod::DIRECT; - } - if((weights->dimension(idx_h) > 5) && (src->dimension(idx_c) > dst->dimension(idx_c)) && (CLFFTConvolutionLayer::validate(src, weights, nullptr, dst, conv_info, act_info, enable_fast_math))) - { - return ConvolutionMethod::FFT; - } - if(src->dimension(idx_c) < 16) - { - return ConvolutionMethod::GEMM; - } - return bool(ClWinogradConv2d::validate(src, weights, nullptr, dst, conv_info, act_info, enable_fast_math)) ? ConvolutionMethod::WINOGRAD : ConvolutionMethod::GEMM; - } - else - { - const bool is_direct_valid = bool(ClDirectConv2d::validate(src, weights, nullptr, dst, conv_info, act_info)); - const bool is_wino_valid = bool(ClWinogradConv2d::validate(src, weights, nullptr, dst, conv_info, act_info, enable_fast_math)); - const size_t kernel_sz_direct_conv_thr = get_direct_conv_kernel_threshold_nhwc(gpu_target); - - // SRGAN case - if((src->dimension(idx_h) > 720U) && (dst->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && (conv_info.pad_top() < 3) - && is_direct_valid) - { - return ConvolutionMethod::DIRECT; - } - - // Floating-point case: GeMM/Direct/Winograd - if(is_data_type_float(src->data_type())) - { - const bool is_large_kernel_sz = (weights->dimension(idx_w) >= kernel_sz_direct_conv_thr) && (weights->dimension(idx_h) >= kernel_sz_direct_conv_thr); - const bool is_ifm_ge_16 = src->dimension(idx_c) >= 16; - const bool is_ifm_gt_ofm = src->dimension(idx_c) > weights->dimension(3U); - - // Run Winograd if valid and IFM >= 16 - if(is_wino_valid && is_ifm_ge_16) - { - return ConvolutionMethod::WINOGRAD; - } - // Run Direct for Large kernel size - if(is_large_kernel_sz && is_ifm_ge_16 && is_direct_valid && is_ifm_gt_ofm) - { - return ConvolutionMethod::DIRECT; - } - - // Default case - return ConvolutionMethod::GEMM; - } - - // Generic case for quantized. Only GeMM - return ConvolutionMethod::GEMM; - } - } -} - -void ClConv2d::run(ITensorPack &tensors) -{ - prepare(tensors); - _operator->run(tensors); -} - -void ClConv2d::prepare(ITensorPack &tensors) -{ - _operator->prepare(tensors); -} - -experimental::MemoryRequirements ClConv2d::workspace() const -{ - return _aux_mem; -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClConv2d.h b/src/runtime/gpu/cl/operators/ClConv2d.h deleted file mode 100644 index cdf3b7df32..0000000000 --- a/src/runtime/gpu/cl/operators/ClConv2d.h +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCONV2D_H -#define ARM_COMPUTE_CLCONV2D_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/FunctionDescriptors.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/IClKernel.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to compute the convolution layer. This function calls the following OpenCL kernels/functions: - * - * -# @ref opencl::ClGemmConv2d - * -# @ref opencl::ClWinogradConv2d - * -# @ref opencl::ClDirectConv2d - * -# @ref CLFFTConvolutionLayer - * - * The function selects one of the algorithms mentioned above based on: - * - The size of the kernel - * - Number of src/dst feature maps - * - Amount of memory needed - * - * Generally GEMM-based convolution is executed when neither Winograd nor FFT nor Direct convolution can be performed. - * - * FP32 Algorithm| Filter Size | Input/Output feature maps | - * --------------|-------------------------------------------------------------|-------------------------------------------| - * Winograd | 3x3 1x3 3x1 5x1 1x5 5x5(fast maths) 7x1 1x7 | Input channels is greater than 3 | - * FFT | Squared kernels and greater than 9x9 | Input feature maps > Output feature maps | - * DirectConv | 9x9 | | - * GEMM | Any size | | - * - * Winograd 5x5 requires fast maths enabled. - * - * FP16 Algorithm| Filter Size | Input/Output feature maps | - * --------------|----------------------------|-------------------------------------------| - * Winograd | 3x3 1x3 3x1 5x1 1x5 5x5 | Input channels is greater than 3 | - * FFT | Not supported | | - * DirectConv | 9x9 | | - * GEMM | Any size | | - * - * Winograd FP16 requires fast maths enabled. - * - */ -class ClConv2d : public IClOperator -{ -public: - /** Default constructor */ - ClConv2d(); - /** Default Destructor */ - ~ClConv2d(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ClConv2d(const ClConv2d &) = delete; - /** Default move constructor */ - ClConv2d(ClConv2d &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ClConv2d &operator=(const ClConv2d &) = delete; - /** Default move assignment operator */ - ClConv2d &operator=(ClConv2d &&) = default; - /** Set the src and dst tensors. - * - * Valid data layouts: - * - NHWC - * - NCHW - * - * Valid data type configurations: - * |src0 |src1 |src2 |dst | - * |:--------------|:------------------|:------|:--------------| - * |F16 |F16 |F16 |F16 | - * |F32 |F32 |F32 |F32 | - * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | - * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | - * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | - * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. 3 lower dimensions represent a single src [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of srcs. - * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * Data type supported: Same as @p src, also could be QSYMM8_PER_CHANNEL if src is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Same as @p src, except for src of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. - * @param[out] dst Destination tensor info. 3 lower dimensions represent a single dst [width, height, OFM], while the rest represent batch of dsts. - * Data types supported: Same as @p src. - * @param[in] conv2d_info Contains convolution 2d info described in @ref Conv2dInfo. - * @param[in] weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. Data type supported: Same as @p src. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const Conv2dInfo &conv2d_info, - const WeightsInfo &weights_info = WeightsInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref ClConv2d - * - * Similar to ClConv2d::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const Conv2dInfo &conv2d_info, - const WeightsInfo &weights_info = WeightsInfo()); - /** Static function to check if given info will return the convolution called by @ref ClConv2d - * - * @param[in] src Source tensor. 3 lower dimensions represent a single src [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of srcs. - * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * Data type supported: Same as @p src, also could be QSYMM8_PER_CHANNEL if src is QASYMM8/QASYMM8_SIGNED. - * @param[in] dst Destination tensor. 3 lower dimensions represent a single dst [width, height, OFM], while the rest represent batch of dsts. - * Data types supported: Same as @p src. - * @param[in] conv2d_info Contains convolution 2d info described in @ref Conv2dInfo. - * @param[in] weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. - * @param[in] gpu_target Specifies the @p GPUTarget. - * - * @return the Convolution Method Hint - */ - static ConvolutionMethod get_convolution_method(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const Conv2dInfo &conv2d_info, - const WeightsInfo &weights_info, const GPUTarget gpu_target); - // Inherited methods overridden: - void run(ITensorPack &tensors) override; - void prepare(ITensorPack &tensors) override; - experimental::MemoryRequirements workspace() const override; - -private: - std::unique_ptr<IClOperator> _operator; - experimental::MemoryRequirements _aux_mem{}; -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLCONV2D_H */ diff --git a/src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp b/src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp deleted file mode 100644 index 0d2f2925d3..0000000000 --- a/src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClConvertFullyConnectedWeights::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const TensorShape &original_src_shape, DataLayout data_layout) -{ - auto k = std::make_unique<kernels::ClConvertFullyConnectedWeightsKernel>(); - k->configure(compile_context, src, dst, original_src_shape, data_layout); - _kernel = std::move(k); -} - -Status ClConvertFullyConnectedWeights::validate(const ITensorInfo *src, const ITensorInfo *dst, const TensorShape &original_src_shape, DataLayout data_layout) -{ - return kernels::ClConvertFullyConnectedWeightsKernel::validate(src, dst, original_src_shape, data_layout); -} -} // namespace opencl -} // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.h b/src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.h deleted file mode 100644 index 7ea35c5a8a..0000000000 --- a/src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_CONVERT_FULLYCONNECTED_WEIGHTS_H -#define ARM_COMPUTE_CL_CONVERT_FULLYCONNECTED_WEIGHTS_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClConvertFullyConnectedWeightsKernel */ -class ClConvertFullyConnectedWeights : public IClOperator -{ -public: - /** Initialise the kernel's inputs and outputs - * - * @param[in] compile_context The compile context to be used. - * @param[in] src The src tensor info. Data types supported: All. - * @param[in] dst The dst tensor info. Data types supported: Same as @p src - * @param[in] original_src_shape Shape of the original src tensor (the one entering fully connected layer). - * @param[in] data_layout The data layout the weights have been trained in. - */ - void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const TensorShape &original_src_shape, DataLayout data_layout); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClConvertFullyConnectedWeights::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const TensorShape &original_src_shape, DataLayout data_layout); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_CONVERT_FULLYCONNECTED_WEIGHTS_H */ diff --git a/src/runtime/gpu/cl/operators/ClCopy.cpp b/src/runtime/gpu/cl/operators/ClCopy.cpp deleted file mode 100644 index 2bdb1f5ba1..0000000000 --- a/src/runtime/gpu/cl/operators/ClCopy.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClCopy.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClCopyKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClCopy::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, Window *dst_window) -{ - auto k = std::make_unique<kernels::ClCopyKernel>(); - k->configure(compile_context, src, dst, dst_window); - _kernel = std::move(k); -} - -Status ClCopy::validate(const ITensorInfo *src, const ITensorInfo *dst, Window *dst_window) -{ - return kernels::ClCopyKernel::validate(src, dst, dst_window); -} -} // namespace opencl -} // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClCopy.h b/src/runtime/gpu/cl/operators/ClCopy.h deleted file mode 100644 index e8ea8125eb..0000000000 --- a/src/runtime/gpu/cl/operators/ClCopy.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_COPY_H -#define ARM_COMPUTE_CL_COPY_H - -#include "arm_compute/core/Window.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClCopyKernel */ -class ClCopy : public IClOperator -{ -public: - /** Initialise the function's source and destination. - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: All. - * @param[out] dst Output tensor info. Data types supported: Same as @p src. - * @param[in] dst_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr. - * - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, Window *dst_window = nullptr); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClCopy::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst, Window *dst_window = nullptr); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_COPY_H */ diff --git a/src/runtime/gpu/cl/operators/ClCrop.cpp b/src/runtime/gpu/cl/operators/ClCrop.cpp deleted file mode 100644 index 17bb11912f..0000000000 --- a/src/runtime/gpu/cl/operators/ClCrop.cpp +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClCrop.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClCropKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClCrop::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value, - Window *dst_window) -{ - auto k = std::make_unique<kernels::ClCropKernel>(); - k->configure(compile_context, src, dst, start, end, batch_index, extrapolation_value, dst_window); - _kernel = std::move(k); -} - -Status ClCrop::validate(const ITensorInfo *src, const ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value, Window *dst_window) -{ - return kernels::ClCropKernel::validate(src, dst, start, end, batch_index, extrapolation_value, dst_window); -} -} // namespace opencl -} // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClCrop.h b/src/runtime/gpu/cl/operators/ClCrop.h deleted file mode 100644 index cca69d6d77..0000000000 --- a/src/runtime/gpu/cl/operators/ClCrop.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_CROP_H -#define ARM_COMPUTE_CL_CROP_H - -#include "arm_compute/core/Window.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClCropKernel */ -class ClCrop : public IClOperator -{ -public: - /** Initialise the function's source and destination. - * - * @note Supported tensor rank: up to 4 - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data type supported: All. Data layouts supported: NHWC. - * @param[out] dst Destination tensor info. Data type supported: F32 - * @param[in] start Coordinates of where to start cropping the image. - * @param[in] end Coordinates of where to end cropping the image. - * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p src. - * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. - * @param[in] dst_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr. - */ - void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, - Window *dst_window = nullptr); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClCrop::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, - Window *dst_window = nullptr); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_CROP_H */ diff --git a/src/runtime/gpu/cl/operators/ClDequantize.cpp b/src/runtime/gpu/cl/operators/ClDequantize.cpp deleted file mode 100644 index 0c1391bb45..0000000000 --- a/src/runtime/gpu/cl/operators/ClDequantize.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClDequantize.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClDequantizeKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClDequantize::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClDequantizeKernel>(); - k->configure(compile_context, src, dst); - _kernel = std::move(k); -} - -Status ClDequantize::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClDequantizeKernel::validate(src, dst); -} - -void ClDequantize::run(ITensorPack &tensors) -{ - ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided"); - CLScheduler::get().enqueue_op(*_kernel.get(), tensors); -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClDequantize.h b/src/runtime/gpu/cl/operators/ClDequantize.h deleted file mode 100644 index 5bcdcb2113..0000000000 --- a/src/runtime/gpu/cl/operators/ClDequantize.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_DEQUANTIZE_H -#define ARM_COMPUTE_CL_DEQUANTIZE_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClDequantizeKernel that dequantizes an input tensor */ -class ClDequantize : public IClOperator -{ -public: - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. - * @param[out] dst Destination tensor info with the same dimensions of @p src. Data type supported: F16/F32. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClDequantize::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); - - // Inherited method overridden - void run(ITensorPack &tensors) override; -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_DEQUANTIZE_H */ diff --git a/src/runtime/gpu/cl/operators/ClDirectConv2d.cpp b/src/runtime/gpu/cl/operators/ClDirectConv2d.cpp deleted file mode 100644 index 13ef42a640..0000000000 --- a/src/runtime/gpu/cl/operators/ClDirectConv2d.cpp +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClDirectConv2d.h" - -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLFillBorderKernel.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClActivationKernel.h" -#include "src/core/gpu/cl/kernels/ClDirectConv2dKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -namespace -{ -ITensorPack select_activation_src_dst(ITensorPack &tensors) -{ - ITensorPack pack; - pack.add_tensor(TensorType::ACL_SRC, tensors.get_tensor(TensorType::ACL_DST)); - pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(TensorType::ACL_DST)); - return pack; -} -} // namespace - -void ClDirectConv2d::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, - const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(src); - - // Configure direct convolution kernel - const ActivationLayerInfo conv2d_act_info = (src->data_layout() == DataLayout::NHWC && is_data_type_float(src->data_type())) ? act_info : ActivationLayerInfo(); - auto k = std::make_unique<kernels::ClDirectConv2dKernel>(); - k->set_target(CLScheduler::get().target()); - k->configure(compile_context, src, weights, biases, dst, conv_info, conv2d_act_info); - _direct_conv_kernel = std::move(k); - - // Configure border handler - PixelValue zero_value(0.f); - if(is_data_type_quantized_asymmetric(src->data_type())) - { - zero_value = PixelValue(0, src->data_type(), src->quantization_info()); - } - auto b = std::make_unique<CLFillBorderKernel>(); - b->configure(compile_context, src, _direct_conv_kernel->border_size(), BorderMode::CONSTANT, zero_value); - _src_border_handler = std::move(b); - - // Fused activation is currently supported for NHWC and floating point types - if(act_info.enabled() && !conv2d_act_info.enabled()) - { - auto a = std::make_unique<kernels::ClActivationKernel>(); - a->configure(compile_context, dst, dst, act_info); - _activation_kernel = std::move(a); - } - - // Tune kernels - CLScheduler::get().tune_kernel_static(*_direct_conv_kernel); -} - -Status ClDirectConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, - const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) -{ - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDirectConv2dKernel::validate(src, weights, biases, dst, conv_info, ActivationLayerInfo(), CLScheduler::get().target())); - if(act_info.enabled()) - { - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClActivationKernel::validate(dst, dst, act_info)); - } - return Status{}; -} - -void ClDirectConv2d::run(ITensorPack &tensors) -{ - // Run border handler - CLScheduler::get().enqueue_op(*_src_border_handler.get(), tensors, false); - // Run direct convolution - CLScheduler::get().enqueue_op(*_direct_conv_kernel.get(), tensors, false); - // Run activation kernel - if(_activation_kernel) - { - auto act_pack = select_activation_src_dst(tensors); - CLScheduler::get().enqueue_op(*_activation_kernel.get(), act_pack, false); - } -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClDirectConv2d.h b/src/runtime/gpu/cl/operators/ClDirectConv2d.h deleted file mode 100644 index a2785b52e3..0000000000 --- a/src/runtime/gpu/cl/operators/ClDirectConv2d.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_DIRECT_CONV2D_H -#define ARM_COMPUTE_CL_DIRECT_CONV2D_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/IClKernel.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -#include <memory> - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to simulate a directly convolution layer. This function calls the following OpenCL kernels: - * - * -# @ref CLFillBorderKernel (executed if padding size is different from zero) - * -# @ref opencl::ClDirectConv2d - */ -class ClDirectConv2d : public IClOperator -{ -public: - ClDirectConv2d() = default; - /** Set the src and dst tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor. 3 lower dimensions represent a single src [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of srcs. - * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p src. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p src data type, except for src of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type. - * @param[out] dst Destination tensor. 3 lower dimensions represent a single dst [width, height, OFM], while the rest represent batch of dsts. - * Data types supported: Same as @p src. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to ClDirectConv2d::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); - - // Inherited method overridden - void run(ITensorPack &tensors) override; - -private: - std::unique_ptr<IClKernel> _direct_conv_kernel{ nullptr }; - std::unique_ptr<IClKernel> _src_border_handler{ nullptr }; - std::unique_ptr<IClKernel> _activation_kernel{ nullptr }; -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_DIRECT_CONV2D_H */
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClElementwiseOperations.cpp b/src/runtime/gpu/cl/operators/ClElementwiseOperations.cpp deleted file mode 100644 index e5b836a0d8..0000000000 --- a/src/runtime/gpu/cl/operators/ClElementwiseOperations.cpp +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClElementwiseOperations.h" - -#include "src/core/gpu/cl/kernels/ClElementwiseKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClElementwiseDivision::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique<kernels::ClArithmeticKernel>(); - k->configure(compile_context, ArithmeticOperation::DIV, src1, src2, dst, act_info); - _kernel = std::move(k); -} - -Status ClElementwiseDivision::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - return kernels::ClArithmeticKernel::validate(ArithmeticOperation::DIV, src1, src2, dst, act_info); -} - -void ClElementwiseMax::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique<kernels::ClArithmeticKernel>(); - k->configure(compile_context, ArithmeticOperation::MAX, src1, src2, dst, act_info); - _kernel = std::move(k); -} - -Status ClElementwiseMax::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - return kernels::ClArithmeticKernel::validate(ArithmeticOperation::MAX, src1, src2, dst, act_info); -} - -void ClElementwiseMin::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique<kernels::ClArithmeticKernel>(); - k->configure(compile_context, ArithmeticOperation::MIN, src1, src2, dst, act_info); - _kernel = std::move(k); -} - -Status ClElementwiseMin::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - return kernels::ClArithmeticKernel::validate(ArithmeticOperation::MIN, src1, src2, dst, act_info); -} - -void ClElementwiseSquaredDiff::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique<kernels::ClArithmeticKernel>(); - k->configure(compile_context, ArithmeticOperation::SQUARED_DIFF, src1, src2, dst, act_info); - _kernel = std::move(k); -} - -Status ClElementwiseSquaredDiff::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - return kernels::ClArithmeticKernel::validate(ArithmeticOperation::SQUARED_DIFF, src1, src2, dst, act_info); -} - -void ClElementwisePower::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique<kernels::ClArithmeticKernel>(); - k->configure(compile_context, ArithmeticOperation::POWER, src1, src2, dst, act_info); - _kernel = std::move(k); -} - -Status ClElementwisePower::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - return kernels::ClArithmeticKernel::validate(ArithmeticOperation::POWER, src1, src2, dst, act_info); -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClElementwiseOperations.h b/src/runtime/gpu/cl/operators/ClElementwiseOperations.h deleted file mode 100644 index c01b107d97..0000000000 --- a/src/runtime/gpu/cl/operators/ClElementwiseOperations.h +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_ELEMENTWISE_OPERATIONS_H -#define ARM_COMPUTE_CL_ELEMENTWISE_OPERATIONS_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for division - * - * @note The tensor data type for the inputs must be F16/F32. - * @note The function performs an arithmetic division between two tensors. - */ -class ClElementwiseDivision : public IClOperator -{ -public: - /** Configure function for a given list of arguments. - * - * @param[in] compile_context The compile context to be used. - * @param[in] src1 First source tensor info. Data types supported: F16/F32. - * @param[in] src2 Second source tensor info. same as @p src1. - * @param[out] dst Destination tensor info. Data types supported: same as @p src1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClElementwiseDivision::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); -}; - -/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for max - * - * @note The tensor data type for the inputs must be U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32. - * @note The function performs a max operation between two tensors. - */ -class ClElementwiseMax : public IClOperator -{ -public: - /** Configure function for a given list of arguments. - * - * @param[in] compile_context The compile context to be used. - * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32. - * @param[in] src2 Second source tensor info. Data types supported: same as @p src1. - * @param[out] dst Destination tensor info. Data types supported: same as @p src1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClElementwiseMax::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); -}; - -/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for min - * - * @note The tensor data type for the inputs must be U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32. - * @note The function performs a max operation between two tensors. - */ -class ClElementwiseMin : public IClOperator -{ -public: - /** Configure function for a given list of arguments. - * - * @param[in] compile_context The compile context to be used. - * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32. - * @param[in] src2 Second source tensor info. Data types supported: same as @p src1. - * @param[out] dst Destination tensor info. Data types supported: same as @p src1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClElementwiseMin::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); -}; - -/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for squared difference - * - * @note The tensor data type for the inputs must be QASYMM8/U8/S16/QSYMM16/F16/F32. - * @note The function performs a squared different operation between two tensors (i.e., out[i] = (in1[i] - in2[i])^2 - */ -class ClElementwiseSquaredDiff : public IClOperator -{ -public: - /** Configure function for a given list of arguments. - * - * @param[in] compile_context The compile context to be used. - * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * @param[in] src2 Second source tensor info. Data types supported: same as @p src1. - * @param[out] dst Destination tensor info. Data types supported: same as @p src1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClElementwiseSquaredDiff::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); -}; - -/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for power - * - * @note The tensor data type for the inputs must be F16/F32. - * @note The function performs an elementwise power of in1 to in2 (i.e., out[i] = in1[i] ^ in2[i]) - */ -class ClElementwisePower : public IClOperator -{ -public: - /** Configure function for a given list of arguments. - * - * @param[in] compile_context The compile context to be used. - * @param[in] src1 First source tensor info. Data types supported: F16/F32. - * @param[in] src2 Second source tensor info. Data types supported: F16/F32. - * @param[out] dst Destination tensor info. Data types supported:F16/F32. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClElementwisePower::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_ELEMENTWISE_OPERATIONS_H */ diff --git a/src/runtime/gpu/cl/operators/ClElementwiseUnary.cpp b/src/runtime/gpu/cl/operators/ClElementwiseUnary.cpp deleted file mode 100644 index 7b830a077f..0000000000 --- a/src/runtime/gpu/cl/operators/ClElementwiseUnary.cpp +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClElementwiseUnary.h" - -#include "src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClRsqrt::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>(); - k->configure(compile_context, src, dst, ElementWiseUnary::RSQRT); - _kernel = std::move(k); -} - -Status ClRsqrt::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::RSQRT); -} - -void ClExp::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>(); - k->configure(compile_context, src, dst, ElementWiseUnary::EXP); - _kernel = std::move(k); -} - -Status ClExp::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::EXP); -} - -void ClNeg::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>(); - k->configure(compile_context, src, dst, ElementWiseUnary::NEG); - _kernel = std::move(k); -} - -Status ClNeg::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::NEG); -} - -void ClSin::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>(); - k->configure(compile_context, src, dst, ElementWiseUnary::SIN); - _kernel = std::move(k); -} - -Status ClSin::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::SIN); -} - -void ClAbs::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>(); - k->configure(compile_context, src, dst, ElementWiseUnary::ABS); - _kernel = std::move(k); -} - -Status ClAbs::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::ABS); -} - -void ClLog::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>(); - k->configure(compile_context, src, dst, ElementWiseUnary::LOG); - _kernel = std::move(k); -} - -Status ClLog::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::LOG); -} - -void ClRound::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>(); - k->configure(compile_context, src, dst, ElementWiseUnary::ROUND); - _kernel = std::move(k); -} - -Status ClRound::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::ROUND); -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClElementwiseUnary.h b/src/runtime/gpu/cl/operators/ClElementwiseUnary.h deleted file mode 100644 index b9acf6f5b8..0000000000 --- a/src/runtime/gpu/cl/operators/ClElementwiseUnary.h +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_ELEMENTWISE_UNARY_H -#define ARM_COMPUTE_CL_ELEMENTWISE_UNARY_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to perform inverse square root on an src tensor. */ -class ClRsqrt : public IClOperator -{ -public: - /** Initialize the function - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: F16/F32. - * @param[out] dst Destination tensor info. Data types supported: same as @p src. - */ - void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClRsqrt::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); -}; - -/** Basic function to perform exponential on an src tensor. */ -class ClExp : public IClOperator -{ -public: - /** Initialize the function - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: F16/F32. - * @param[out] dst Destination tensor info. Data types supported: same as @p src. - */ - void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClExp::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); -}; - -/** Basic function to negate an src tensor. */ -class ClNeg : public IClOperator -{ -public: - /** Initialize the function - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: F16/F32. - * @param[out] dst Destination tensor info. Data types supported: same as @p src. - */ - void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClNeg::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); -}; - -/** Basic function to calculate sine of an src tensor. */ -class ClSin : public IClOperator -{ -public: - /** Initialize the function - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: F16/F32. - * @param[out] dst Destination tensor info. Data types supported: same as @p src. - */ - void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClSin::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); -}; - -/** Basic function to perform elementwise log on an src tensor. */ -class ClLog : public IClOperator -{ -public: - /** Initialize the function - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: F16/F32. - * @param[out] dst Destination tensor info. Data types supported: same as @p src. - */ - void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClLog::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); -}; - -/** Basic function to get the absolute value of an src tensor. */ -class ClAbs : public IClOperator -{ -public: - /** Initialize the function - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: F16/F32. - * @param[out] dst Destination tensor info. Data types supported: same as @p src. - */ - void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClAbs::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); -}; - -/** Basic function to get the round (to the nearest even) value of an src tensor. */ -class ClRound : public IClOperator -{ -public: - /** Initialize the function - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: F16/F32. - * @param[out] dst Destination tensor info. Data types supported: same as @p src. - */ - void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClRound::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_ELEMENTWISE_UNARY_H */ diff --git a/src/runtime/gpu/cl/operators/ClFill.cpp b/src/runtime/gpu/cl/operators/ClFill.cpp deleted file mode 100644 index 4d0afaef24..0000000000 --- a/src/runtime/gpu/cl/operators/ClFill.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClFill.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClFillKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClFill::configure(const ClCompileContext &compile_context, ITensorInfo *tensor, const PixelValue &constant_value, Window *dst_window) -{ - auto k = std::make_unique<kernels::ClFillKernel>(); - k->configure(compile_context, tensor, constant_value, dst_window); - _kernel = std::move(k); -} - -Status ClFill::validate(const ITensorInfo *tensor, const PixelValue &constant_value, Window *dst_window) -{ - return kernels::ClFillKernel::validate(tensor, constant_value, dst_window); -} -} // namespace opencl -} // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClFill.h b/src/runtime/gpu/cl/operators/ClFill.h deleted file mode 100644 index cc79b915a7..0000000000 --- a/src/runtime/gpu/cl/operators/ClFill.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_FILL_H -#define ARM_COMPUTE_CL_FILL_H - -#include "arm_compute/core/Window.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClFillKernel */ -class ClFill : public IClOperator -{ -public: - /** Initialise the kernel's tensor and filling value - * - * @param[in] compile_context The compile context to be used. - * @param[in,out] tensor Source tensor info. Supported data types: All. - * @param[in] constant_value The value used to fill the planes of the tensor - * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *tensor, const PixelValue &constant_value, Window *window = nullptr); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to ClFill::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *tensor, const PixelValue &constant_value, Window *window = nullptr); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_FILL_H */ diff --git a/src/runtime/gpu/cl/operators/ClFlatten.cpp b/src/runtime/gpu/cl/operators/ClFlatten.cpp deleted file mode 100644 index 060b653dee..0000000000 --- a/src/runtime/gpu/cl/operators/ClFlatten.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClFlatten.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClReshapeKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClFlatten::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClReshapeKernel>(); - k->configure(compile_context, src, dst); - _kernel = std::move(k); -} - -Status ClFlatten::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClReshapeKernel::validate(src, dst); -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClFlatten.h b/src/runtime/gpu/cl/operators/ClFlatten.h deleted file mode 100644 index 8bd619b518..0000000000 --- a/src/runtime/gpu/cl/operators/ClFlatten.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_FLATTEN_H -#define ARM_COMPUTE_CL_FLATTEN_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to flatten a given input */ -class ClFlatten : public IClOperator -{ -public: - /** Configure operator for a given list of arguments - * - * Valid data layouts: - * - All - * - * Valid data type configurations: - * |src |dst | - * |:--------------|:--------------| - * |All |All | - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor to flatten with at least 3 dimensions. - * The dimensions above the third will be interpreted as batches. Data types supported: All - * @param[in] dst Destination tensor with shape [w*h*d, input_batches] where: - * w = width input tensor, h = height input tensor and d = depth input tensor. - * Data type supported: same as @p src - */ - void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClFlatten::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_FLATTEN_H */ diff --git a/src/runtime/gpu/cl/operators/ClFloor.cpp b/src/runtime/gpu/cl/operators/ClFloor.cpp deleted file mode 100644 index 94e77c0c54..0000000000 --- a/src/runtime/gpu/cl/operators/ClFloor.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClFloor.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClFloorKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClFloor::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClFloorKernel>(); - k->configure(compile_context, src, dst); - _kernel = std::move(k); -} - -Status ClFloor::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClFloorKernel::validate(src, dst); -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClFloor.h b/src/runtime/gpu/cl/operators/ClFloor.h deleted file mode 100644 index 90bdee6c7e..0000000000 --- a/src/runtime/gpu/cl/operators/ClFloor.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_FLOOR_H -#define ARM_COMPUTE_CL_FLOOR_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClFloorKernel */ -class ClFloor : public IClOperator -{ -public: - /** Configure operator for a given list of arguments - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: F16/F32. - * @param[in] dst Destination tensor info. Data type supported: same as @p src - */ - void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to ClFloor::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_FLOOR_H */ diff --git a/src/runtime/gpu/cl/operators/ClFullyConnected.cpp b/src/runtime/gpu/cl/operators/ClFullyConnected.cpp deleted file mode 100644 index 377168d864..0000000000 --- a/src/runtime/gpu/cl/operators/ClFullyConnected.cpp +++ /dev/null @@ -1,496 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClFullyConnected.h" - -#include "arm_compute/core/Size2D.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/utils/quantization/AsymmHelpers.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLFillBorderKernel.h" - -#include "src/core/helpers/MemoryHelpers.h" -#include "src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.h" -#include "src/runtime/gpu/cl/operators/ClFlatten.h" -#include "src/runtime/gpu/cl/operators/ClGemm.h" -#include "src/runtime/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.h" -#include "src/runtime/gpu/cl/operators/ClTranspose.h" -#include "src/runtime/gpu/cl/utils/ClAuxTensorHandler.h" - -#include "support/Cast.h" - -#include <algorithm> - -namespace arm_compute -{ -namespace opencl -{ -using namespace arm_compute::experimental; -using namespace arm_compute::misc::shape_calculator; - -namespace -{ -Status construct_gemmlowp_output_stage(const ITensorInfo &src, const ITensorInfo &weights, const ITensorInfo &dst, - GEMMLowpOutputStageInfo &gemmlowp_output_stage, ActivationLayerInfo activation_info) -{ - gemmlowp_output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; - gemmlowp_output_stage.gemmlowp_offset = 0; - gemmlowp_output_stage.gemmlowp_multiplier = 0; - gemmlowp_output_stage.gemmlowp_shift = 0; - - const auto data_type = src.data_type(); - - // Configure output stage for quantized case - if(is_data_type_quantized_asymmetric(data_type)) - { - const QuantizationInfo oq_info = dst.quantization_info(); - const UniformQuantizationInfo iq_unif = src.quantization_info().uniform(); - const UniformQuantizationInfo wq_unif = weights.quantization_info().uniform(); - const UniformQuantizationInfo oq_unif = oq_info.uniform(); - - const auto output_quant_info = (dst.total_size() == 0) ? iq_unif : oq_unif; - - const float multiplier = (iq_unif.scale * wq_unif.scale) / output_quant_info.scale; - int output_multiplier = 0; - int output_shift = 0; - ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift)); - - PixelValue type_min{}; - PixelValue type_max{}; - std::tie(type_min, type_max) = get_min_max(data_type); - - if(activation_info.enabled()) - { - std::tie(type_min, type_max) = get_quantized_activation_min_max(activation_info, data_type, output_quant_info); - } - - // Set the GEMMLowp output stage info - gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset; - gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier; - gemmlowp_output_stage.gemmlowp_shift = output_shift; - gemmlowp_output_stage.gemmlowp_multipliers.push_back(output_multiplier); - gemmlowp_output_stage.gemmlowp_shifts.push_back(output_shift); - type_min.get(gemmlowp_output_stage.gemmlowp_min_bound); - type_max.get(gemmlowp_output_stage.gemmlowp_max_bound); - } - - return Status{}; -} - -Status validate_mm(const ITensorInfo &src, const ITensorInfo &weights, const ITensorInfo *bias, const ITensorInfo &dst, const FullyConnectedLayerInfo &fc_info) -{ - GEMMLowpOutputStageInfo gemmlowp_output_stage; - ARM_COMPUTE_RETURN_ON_ERROR(construct_gemmlowp_output_stage(src, weights, dst, gemmlowp_output_stage, fc_info.activation_info)); - - const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped - false, // is_b_reshaped - true, // reshape_b_only_on_first_run - 0, // depth_output_gemm3d - false, // reinterpret_input_as_3d - fc_info.retain_internal_weights, // retain_internal_weights - gemmlowp_output_stage, // gemmlowp_output_stage - fc_info.fp_mixed_precision, // fp_mixed_precision - false, // fast_math - true, // broadcast_bias - ActivationLayerInfo()); // activation_info - - if(is_data_type_quantized_asymmetric(src.data_type())) - { - const UniformQuantizationInfo iq_info = src.quantization_info().uniform(); - const UniformQuantizationInfo wq_info = weights.quantization_info().uniform(); - - // Since we need negative offsets for computing convolution, we need to change QuantizationInfo() - // Extract and negate src and weights offset - const QuantizationInfo src_quantization_info(iq_info.scale, -iq_info.offset); - const QuantizationInfo weights_quantization_info(wq_info.scale, -wq_info.offset); - - // Validate gemmlowp function - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmLowpMatrixMultiplyCore::validate(&src.clone()->set_quantization_info(src_quantization_info), - &weights.clone()->set_quantization_info(weights_quantization_info), - bias, - &dst, - gemm_info)); - } - else - { - ARM_COMPUTE_RETURN_ON_ERROR(ClGemm::validate(&src, &weights, bias, &dst, 1.f, 1.f, gemm_info)); - } - - return Status{}; -} -} // namespace - -ClFullyConnected::ClFullyConnected() - : _convert_weights(nullptr), - _flatten(nullptr), - _reshape_weights(nullptr), - _mm_gemm(nullptr), - _mm_gemmlowp(nullptr), - _aux_mem(Count) -{ -} - -ClFullyConnected::~ClFullyConnected() = default; - -void ClFullyConnected::configure_mm(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *bias, ITensorInfo *dst, - const FullyConnectedLayerInfo &fc_info) -{ - GEMMLowpOutputStageInfo gemmlowp_output_stage; - construct_gemmlowp_output_stage(*src, *weights, *dst, gemmlowp_output_stage, fc_info.activation_info); - - const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped - false, // is_b_reshaped - true, // reshape_b_only_on_first_run - 0, // depth_output_gemm3d - false, // reinterpret_input_as_3d - fc_info.retain_internal_weights, // retain_internal_weights - gemmlowp_output_stage, // gemmlowp_output_stage - fc_info.fp_mixed_precision, // fp_mixed_precision - false, // fast_math - true, // broadcast_bias - fc_info.activation_info, // activation_info - fc_info.constant_weights); // constant_weights - - if(_is_quantized) - { - // Since we need negative offsets for computing convolution, we need to change QuantizationInfo() - // Extract and negate input and weights offset - const QuantizationInfo src_quantization_info = src->quantization_info(); - const QuantizationInfo weights_quantization_info = weights->quantization_info(); - - TensorInfo src_info = src->clone()->set_quantization_info(src_quantization_info); - TensorInfo weights_info = weights->clone()->set_quantization_info(weights_quantization_info); - - src_info.set_quantization_info(QuantizationInfo(src_quantization_info.uniform().scale, -src_quantization_info.uniform().offset)); - weights_info.set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset)); - - // Configure gemmlowp function - _mm_gemmlowp = std::make_unique<ClGemmLowpMatrixMultiplyCore>(); - _mm_gemmlowp->configure(compile_context, &src_info, &weights_info, bias, dst, gemm_info); - } - else - { - // Configure matrix multiply kernel - _mm_gemm = std::make_unique<ClGemm>(); - _mm_gemm->configure(compile_context, src, weights, bias, dst, 1.f, 1.f, gemm_info); - } -} - -void ClFullyConnected::configure_conv_fc(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *bias, ITensorInfo *dst, - const FullyConnectedLayerInfo &fc_info) -{ - ARM_COMPUTE_ERROR_ON((weights->dimension(1) != (src->dimension(0) * src->dimension(1) * src->dimension(2)))); - - // If the fully connected layer is called after a convolution layer, the input tensor must be linearized - - // Initialize output tensor for flatten - _flattened_src = src->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_flatten_shape(src)).set_data_layout(DataLayout::NCHW); - - // Configure flatten kernel - _flatten = std::make_unique<ClFlatten>(); - _flatten->configure(compile_context, src, &_flattened_src); - - // Configure matrix multiply kernel - configure_mm(compile_context, &_flattened_src, weights, bias, dst, fc_info); -} - -void ClFullyConnected::configure_fc_fc(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *bias, ITensorInfo *dst, - const FullyConnectedLayerInfo &fc_info) -{ - ARM_COMPUTE_ERROR_ON(src->dimension(0) != weights->dimension(1)); - - // Configure matrix multiply kernel - configure_mm(compile_context, src, weights, bias, dst, fc_info); -} - -void ClFullyConnected::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, - FullyConnectedLayerInfo fc_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst); - - // Perform validate step - ARM_COMPUTE_ERROR_THROW_ON(ClFullyConnected::validate(src, weights, biases, dst, fc_info)); - - _are_weights_converted = true; - _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; - _is_fc_after_conv = true; - _is_quantized = is_data_type_quantized_asymmetric(src->data_type()); - _is_prepared = fc_info.retain_internal_weights; - _weights_to_use = TensorInfo(*weights); - _weights_to_use_idx = ACL_SRC_1; - - // With the Fully Connected layer we can have 4 different cases: - // 1) Convolution layer -> Fully Connected layer without batches - // 2) Fully Connected layer -> Fully Connected layer without batches - // 3) Convolution layer -> Fully Connected layer with batches - // 4) Fully Connected layer -> Fully Connected layer with batches - - // Check if we have a fully connected layer with batches - const bool is_batched_fc_layer = dst->dimension(1) > 1; - if(is_batched_fc_layer) - { - _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(src->tensor_shape().cbegin() + 3, - src->tensor_shape().cend(), - dst->tensor_shape().cbegin() + 1)); - } - else - { - _is_fc_after_conv = src->num_dimensions() > 1; - } - - ITensorInfo *weights_used = weights; - - // Reshape weights if needed - if(!_are_weights_reshaped) - { - // Reshape the weights - _reshape_weights = std::make_unique<ClTranspose>(); - _reshape_weights->configure(compile_context, weights, &_reshaped_weights); - weights_used = &_reshaped_weights; - _weights_to_use_idx = offset_int_vec(TransposedWeights); - } - - // Convert weights if needed - if(_is_fc_after_conv && (src->data_layout() != fc_info.weights_trained_layout)) - { - // Convert weights - _convert_weights = std::make_unique<ClConvertFullyConnectedWeights>(); - _convert_weights->configure(compile_context, - weights_used, - &_converted_weights, - src->tensor_shape(), - fc_info.weights_trained_layout); - - weights_used = &_converted_weights; - _weights_to_use_idx = offset_int_vec(ConvertedWeights); - _are_weights_converted = false; - } - - if(_is_fc_after_conv) - { - // Fully Connected layer after a Convolution Layer without batches - configure_conv_fc(compile_context, src, weights_used, biases, dst, fc_info); - } - else - { - // Fully Connected layer after a Fully Connected Layer without batches - configure_fc_fc(compile_context, src, weights_used, biases, dst, fc_info); - } - // Update TensorInfo of final weights used (Need to be done in the end due to padding expansion) - _weights_to_use = *weights_used; - - // Set auxiliary memory requirements - auto gemm_mem_req = (_is_quantized) ? _mm_gemmlowp->workspace() : _mm_gemm->workspace(); - for(unsigned int i = 0; i < gemm_mem_req.size(); ++i) - { - _aux_mem[i] = gemm_mem_req[i]; - } - if(_aux_mem[1].size > 0 || _aux_mem[2].size > 0) // Persistent weights memory on GEMMs - { - // Release permuted weights at the of prepare as they are further transposed by the assembly dispatch - _aux_mem[TransposedWeights] = MemoryInfo(offset_int_vec(TransposedWeights), MemoryLifetime::Prepare, _reshaped_weights.total_size()); - _aux_mem[ConvertedWeights] = MemoryInfo(offset_int_vec(ConvertedWeights), MemoryLifetime::Prepare, _converted_weights.total_size()); - } - else - { - // Release permuted weights at the of prepare as they are further transposed by the assembly dispatch - const auto transposed_wei_lft = (_weights_to_use_idx == offset_int_vec(TransposedWeights)) ? MemoryLifetime::Persistent : MemoryLifetime::Prepare; - const auto converted_wei_lft = (_weights_to_use_idx == offset_int_vec(ConvertedWeights)) ? MemoryLifetime::Persistent : MemoryLifetime::Prepare; - - _aux_mem[TransposedWeights] = MemoryInfo(offset_int_vec(TransposedWeights), transposed_wei_lft, _reshaped_weights.total_size()); - _aux_mem[ConvertedWeights] = MemoryInfo(offset_int_vec(ConvertedWeights), converted_wei_lft, _converted_weights.total_size()); - } - _aux_mem[FlattenedSrc] = MemoryInfo(offset_int_vec(FlattenedSrc), MemoryLifetime::Temporary, _flattened_src.total_size()); -} - -Status ClFullyConnected::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, - FullyConnectedLayerInfo fc_info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, weights, dst); - ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2); - ARM_COMPUTE_RETURN_ERROR_ON(fc_info.activation_info.enabled() && is_data_type_quantized(src->data_type()) && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::RELU - && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU); - ARM_COMPUTE_RETURN_ERROR_ON(!fc_info.constant_weights && (!fc_info.are_weights_reshaped || fc_info.transpose_weights)); - - bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; - bool is_fc_after_conv = true; - - const ITensorInfo &flatten_src = TensorInfo(src->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_flatten_shape(src)).set_data_layout(DataLayout::NCHW)); - const ITensorInfo &reshaped_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights))); - const ITensorInfo &converted_weights = weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) : TensorInfo(*reshaped_weights.clone()); - - // With the Fully Connected layer we can have 4 different cases: - // 1) Convolution layer -> Fully Connected layer without batches - // 2) Fully Connected layer -> Fully Connected layer without batches - // 3) Convolution layer -> Fully Connected layer with batches - // 4) Fully Connected layer -> Fully Connected layer with batches - - const ITensorInfo *src_to_use = src; - const ITensorInfo *weights_to_use = weights; - - // Check if we have a fully connected layer with batches - const bool is_batched_fc_layer = dst->dimension(1) > 1; - if(is_batched_fc_layer) - { - is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(src->tensor_shape().cbegin() + 3, - src->tensor_shape().cend(), - dst->tensor_shape().cbegin() + 1)); - } - else - { - is_fc_after_conv = src->num_dimensions() > 1; - } - - if(!weights_reshaped) - { - // Validate reshape weights kernel - ARM_COMPUTE_RETURN_ON_ERROR(ClTranspose::validate(weights, &reshaped_weights)); - weights_to_use = &reshaped_weights; - } - - if(is_fc_after_conv && (src->data_layout() != fc_info.weights_trained_layout)) - { - // Validate convert weights kernel - ARM_COMPUTE_RETURN_ON_ERROR(ClConvertFullyConnectedWeights::validate(weights_to_use, - &converted_weights, - src->tensor_shape(), - fc_info.weights_trained_layout)); - weights_to_use = &converted_weights; - } - - if(is_fc_after_conv) - { - // Fully Connected layer after a Convolution Layer without batches - ARM_COMPUTE_RETURN_ERROR_ON((weights_to_use->dimension(1) != (src->dimension(0) * src->dimension(1) * src->dimension(2)))); - - // Validate flatten kernel - ARM_COMPUTE_RETURN_ON_ERROR(ClFlatten::validate(src, &flatten_src)); - src_to_use = &flatten_src; - } - else - { - // Fully Connected layer after a Fully Connected Layer without batches - ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(0) != weights_to_use->dimension(1)); - } - - // Validate matrix multiply kernel - ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(*src_to_use, *weights_to_use, biases, *dst, fc_info)); - - return Status{}; -} - -void ClFullyConnected::run(ITensorPack &tensors) -{ - prepare(tensors); - - auto src = tensors.get_const_tensor(ACL_SRC_0); - - CLAuxTensorHandler flattened_src(offset_int_vec(FlattenedSrc), _flattened_src, tensors, false); - CLAuxTensorHandler weights(_weights_to_use_idx, _weights_to_use, tensors, false); - - // Linearize input if it comes from a convolutional layer - if(_is_fc_after_conv) - { - ITensorPack flatten_pack{ { ACL_SRC, src }, { ACL_DST, flattened_src.get() } }; - _flatten->run(flatten_pack); - } - - ITensorPack gemm_pack = tensors; - gemm_pack.add_const_tensor(ACL_SRC_0, (_is_fc_after_conv) ? flattened_src.get() : src); - if(_weights_to_use_idx != ACL_SRC_1) - { - gemm_pack.add_const_tensor(ACL_SRC_1, weights.get()); - } - - // Run matrix multiply - if(_is_quantized) - { - _mm_gemmlowp->run(gemm_pack); - } - else - { - _mm_gemm->run(gemm_pack); - } -} - -void ClFullyConnected::prepare(ITensorPack &tensors) -{ - if(!_is_prepared) - { - auto weights = tensors.get_const_tensor(ACL_SRC_1); - - CLAuxTensorHandler reshaped_weights(offset_int_vec(TransposedWeights), _reshaped_weights, tensors, false); - CLAuxTensorHandler converted_weights(offset_int_vec(ConvertedWeights), _converted_weights, tensors, false); - - // Pointer to current weights - const ITensor *cur_weights = weights; - - // Reshape of the weights if needed (happens only once) - if(!_are_weights_reshaped) - { - // Run reshape weights kernel and mark weights as unused - ITensorPack transpose_pack{ { ACL_SRC, weights }, { ACL_DST, reshaped_weights.get() } }; - _reshape_weights->run(transpose_pack); - - cur_weights->mark_as_unused(); - cur_weights = reshaped_weights.get(); - - _are_weights_reshaped = true; - } - - // Convert weights if needed (happens only once) - if(!_are_weights_converted) - { - ITensorPack convert_pack{ { ACL_SRC, cur_weights }, { ACL_DST, converted_weights.get() } }; - _convert_weights->run(convert_pack); - - cur_weights->mark_as_unused(); - cur_weights = converted_weights.get(); - - _are_weights_converted = true; - } - - tensors.add_const_tensor(ACL_SRC_1, cur_weights); - - // Prepare GEMM prepare and release unused weights - if(!_is_quantized) - { - _mm_gemm->prepare(tensors); - } - else - { - _mm_gemmlowp->prepare(tensors); - } - _is_prepared = true; - } -} - -experimental::MemoryRequirements ClFullyConnected::workspace() const -{ - return _aux_mem; -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClFullyConnected.h b/src/runtime/gpu/cl/operators/ClFullyConnected.h deleted file mode 100644 index 86f95756d5..0000000000 --- a/src/runtime/gpu/cl/operators/ClFullyConnected.h +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_FULLY_CONNECTED_H -#define ARM_COMPUTE_CL_FULLY_CONNECTED_H - -#include "arm_compute/core/TensorInfo.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -#include <memory> - -namespace arm_compute -{ -namespace opencl -{ -// Forward declarations -class ClConvertFullyConnectedWeights; -class ClFlatten; -class ClGemm; -class ClGemmLowpMatrixMultiplyCore; -class ClTranspose; - -/** Basic function to compute a Fully Connected layer on OpenCL. This function calls the following OpenCL kernels: - * - * -# @ref opencl::kernels::ClIm2ColKernel (called when the input comes from a convolutional layer) - * -# @ref CLTranspose (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once) - * -# @ref opencl::kernels::ClGemmMatrixMultiplyKernel or @ref CLGEMMLowpMatrixMultiplyCore (if quantized asymmetric) - * - * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. - */ -class ClFullyConnected : public IClOperator -{ -public: - ClFullyConnected(); - ~ClFullyConnected(); - /** Set the input and output tensors. - * - * Valid data layouts: - * - NHWC - * - NCHW - * - * Valid data type configurations: - * |src0 |src1 |src2 |dst | - * |:--------------|:------------------|:------|:--------------| - * |F16 |F16 |F16 |F16 | - * |F32 |F32 |F32 |F32 | - * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | - * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor. The weights must be 2 dimensional. - * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions. - * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension. - * Data type supported: Same as @p src. - * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p src. - * @param[out] dst Destination tensor. Its shape should be equal to the output of a matrix multiplication between: - * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer - * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer. - * Data type supported: Same as @p src. - * @param[in] fc_info (Optional) Fully connected layer additional info - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, - FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo()); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to ClFullyConnected::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, - FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo()); - - // Inherited methods overriden - void run(ITensorPack &tensors) override; - void prepare(ITensorPack &tensors) override; - experimental::MemoryRequirements workspace() const override; - -private: - void configure_fc_fc(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *bias, ITensorInfo *dst, const FullyConnectedLayerInfo &fc_info); - void configure_conv_fc(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *bias, ITensorInfo *dst, const FullyConnectedLayerInfo &fc_info); - void configure_mm(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *bias, ITensorInfo *dst, const FullyConnectedLayerInfo &fc_info); - -private: - enum AuxTensorIdx - { - TransposedWeights = 10, - ConvertedWeights = 11, - FlattenedSrc = 12, - Count = 13 - }; - - std::unique_ptr<ClConvertFullyConnectedWeights> _convert_weights; - std::unique_ptr<ClFlatten> _flatten; - std::unique_ptr<ClTranspose> _reshape_weights; - std::unique_ptr<ClGemm> _mm_gemm; - std::unique_ptr<ClGemmLowpMatrixMultiplyCore> _mm_gemmlowp; - - experimental::MemoryRequirements _aux_mem{}; - - TensorInfo _flattened_src{}; - TensorInfo _converted_weights{}; - TensorInfo _reshaped_weights{}; - - TensorInfo _weights_to_use{}; - int _weights_to_use_idx{ ACL_SRC_1 }; - - bool _are_weights_converted{ true }; - bool _are_weights_reshaped{ true }; - bool _is_fc_after_conv{ true }; - bool _is_quantized{ false }; - bool _is_prepared{ false }; -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_FULLY_CONNECTED_H */ diff --git a/src/runtime/gpu/cl/operators/ClGemm.cpp b/src/runtime/gpu/cl/operators/ClGemm.cpp deleted file mode 100644 index 59bbabba26..0000000000 --- a/src/runtime/gpu/cl/operators/ClGemm.cpp +++ /dev/null @@ -1,771 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClGemm.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GPUTarget.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/KernelDescriptors.h" -#include "arm_compute/core/Log.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/ITensorAllocator.h" - -#include "src/common/utils/Log.h" -#include "src/core/gpu/cl/IClKernel.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/MemoryHelpers.h" -#include "src/core/utils/helpers/float_ops.h" -#include "src/runtime/CL/gemm/CLGEMMKernelSelection.h" -#include "src/runtime/CL/gemm_auto_heuristics/CLGEMMAutoHeuristics.h" -#include "src/runtime/gpu/cl/utils/ClAuxTensorHandler.h" - -#include "support/Cast.h" -#include "utils/TypePrinter.h" - -namespace arm_compute -{ -namespace opencl -{ -using namespace arm_compute::misc::shape_calculator; -using namespace arm_compute::cl_gemm; -using namespace arm_compute::experimental; -using namespace arm_compute::utils::cast; -using namespace arm_compute::opencl::kernels; - -namespace -{ -inline bool validate_gemm_kernel(CLGEMMKernelType kernel_type) -{ - switch(kernel_type) - { - case CLGEMMKernelType::NATIVE_V1: - case CLGEMMKernelType::RESHAPED_ONLY_RHS: - case CLGEMMKernelType::RESHAPED_V1: - case CLGEMMKernelType::RESHAPED: - { - return true; - } - default: - { - return false; - } - } -} -//Automatically select between mlgo (prioritized) and default heuristics for gemm kernel type -inline CLGEMMKernelType auto_select_gemm_kernel(auto_heuristics::CommonQuery query, bool reshape_b_only_on_first_run, bool constant_weights) -{ - if(!constant_weights) - { - return CLGEMMKernelType::NATIVE_V1; - } - - auto gemm_kernel = auto_heuristics::select_mlgo_gemm_kernel(query, reshape_b_only_on_first_run); - if(bool(gemm_kernel)) - { - if(validate_gemm_kernel(gemm_kernel.gemm_type)) - { - ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use gemm kernel from mlgo heuristics: %s.", to_string(gemm_kernel.gemm_type).c_str()); - return gemm_kernel.gemm_type; - } - } - gemm_kernel = auto_heuristics::select_default_gemm_kernel(query, reshape_b_only_on_first_run); - ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use gemm kernel from default heuristics: %s.", to_string(gemm_kernel.gemm_type).c_str()); - return gemm_kernel.gemm_type; -} -// Validate lhs_info and rhs_info for reshaped only rhs kernel -inline bool validate_lhs_rhs_info_reshaped_only_rhs(const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, - const ITensorInfo *output, GEMMKernelInfo gemm_kernel_info) -{ - // Validate GEMMLHSMatrixInfo and GEMMRHSMatrixInfo for reshaped only rhs kernel - TensorInfo tmp_b_info{}; - // Validate reshape RHS kernel - auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info))); - if(!bool(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info))) - { - return false; - } - // Validate mm kernel - gemm_kernel_info.lhs_info = lhs_info; - gemm_kernel_info.rhs_info = rhs_info; - gemm_kernel_info.has_pad_y = false; - if(!bool(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, 1.f, 0.f, lhs_info, rhs_info, gemm_kernel_info))) - { - return false; - } - gemm_kernel_info.has_pad_y = true; - if(!bool(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, 1.f, 0.f, lhs_info, rhs_info, gemm_kernel_info))) - { - return false; - } - return true; -} - -//Automatically select between mlgo (prioritized) and default heuristics for reshaped only rhs kernel configs -inline std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery query, GEMMKernelInfo kernel_info, const ITensorInfo *a, - const ITensorInfo *b, - const ITensorInfo *c, const ITensorInfo *output) -{ - auto config = auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(query); - if(config) - { - if(validate_lhs_rhs_info_reshaped_only_rhs(config.lhs_info, config.rhs_info, a, b, c, output, kernel_info)) - { - ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped_only_rhs config from mlgo heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str()); - return { config.lhs_info, config.rhs_info }; - } - } - config = auto_heuristics::select_default_gemm_config_reshaped_only_rhs(query); - ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped_only_rhs config from default heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str()); - return { config.lhs_info, config.rhs_info }; -} - -// Validate lhs_info and rhs_info for reshaped kernel -inline bool validate_lhs_rhs_info_reshaped(const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, - const ITensorInfo *output, GEMMKernelInfo gemm_kernel_info, bool reinterpret_input_as_3d) -{ - // Validate GEMMLHSMatrixInfo and GEMMRHSMatrixInfo for reshaped kernel - TensorInfo tmp_a_info{}; - TensorInfo tmp_b_info{}; - - // Validate reshape LHS kernel - auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*a, lhs_info, reinterpret_input_as_3d))); - if(!bool(ClGemmReshapeLhsMatrixKernel::validate(a, &tmp_a_info, lhs_info, reinterpret_input_as_3d))) - { - return false; - } - - // Validate reshape RHS kernel - auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info))); - if(!bool(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info))) - { - return false; - } - // Validate mm kernel - gemm_kernel_info.lhs_info = lhs_info; - gemm_kernel_info.rhs_info = rhs_info; - if(!bool(ClGemmMatrixMultiplyReshapedKernel::validate(&tmp_a_info, &tmp_b_info, c, output, 1.f, 0.f, lhs_info, rhs_info, gemm_kernel_info))) - { - return false; - } - return true; -} - -//Automatically select between mlgo (prioritized) and default heuristics for reshaped kernel configs -inline std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> auto_select_gemm_config_reshaped(auto_heuristics::CommonQuery query, GEMMKernelInfo kernel_info, const ITensorInfo *a, const ITensorInfo *b, - const ITensorInfo *c, const ITensorInfo *output, bool reinterpret_input_as_3d) -{ - auto config = auto_heuristics::select_mlgo_gemm_config_reshaped(query); - if(config) - { - if(validate_lhs_rhs_info_reshaped(config.lhs_info, config.rhs_info, a, b, c, output, kernel_info, reinterpret_input_as_3d)) - { - ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped config from mlgo heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str()); - return { config.lhs_info, config.rhs_info }; - } - } - config = auto_heuristics::select_default_gemm_config_reshaped(query); - ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped config from default heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str()); - return { config.lhs_info, config.rhs_info }; -} -} // namespace - -ClGemm::ClGemm() - : _mm_kernel(std::make_unique<ClGemmMatrixMultiplyKernel>()), - _reshape_lhs_kernel(std::make_unique<ClGemmReshapeLhsMatrixKernel>()), - _reshape_rhs_kernel(std::make_unique<ClGemmReshapeRhsMatrixKernel>()), - _mm_reshaped_kernel(std::make_unique<ClGemmMatrixMultiplyReshapedKernel>()), - _mm_reshaped_only_rhs_kernel(std::make_unique<ClGemmMatrixMultiplyReshapedOnlyRhsKernel>()), - _mm_reshaped_only_rhs_fallback_kernel(std::make_unique<ClGemmMatrixMultiplyReshapedOnlyRhsKernel>()), - _tmp_a(), - _tmp_b(), - _reshape_b_only_on_first_run(false), - _gemm_kernel_type(CLGEMMKernelType::NATIVE_V1), - _is_prepared(false), - _aux_mem(AuxTensorIdx::Count) -{ -} - -void ClGemm::configure_native_v1(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, - const GEMMInfo &gemm_info) -{ - const unsigned int m = gemm_info.reinterpret_input_as_3d() ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - const GPUTarget gpu_target = CLScheduler::get().target(); - - // Set the target for the kernels - _mm_kernel->set_target(gpu_target); - - GEMMReshapeInfo reshape_info(m, n, k, 1, 1, gemm_info.depth_output_gemm3d(), gemm_info.reinterpret_input_as_3d(), gemm_info.broadcast_bias()); - - // Configure and tune matrix multiply kernel - _mm_kernel->configure(compile_context, a, b, c, output, alpha, beta, false, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info()); - - // Tune kernel statically - CLScheduler::get().tune_kernel_static(*_mm_kernel); -} - -void ClGemm::configure_reshaped_v1(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, - const GEMMInfo &gemm_info) -{ - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); - const GPUTarget gpu_target = CLScheduler::get().target(); - int mult_transpose1xW_width = 1; - int mult_interleave4x4_height = 1; - - // Set the target for the kernels - _reshape_lhs_kernel->set_target(gpu_target); - _mm_kernel->set_target(gpu_target); - - if(get_arch_from_target(gpu_target) == GPUTarget::BIFROST) - { - mult_transpose1xW_width = 4; - mult_interleave4x4_height = 2; - } - - GEMMRHSMatrixInfo rhs_info; - rhs_info.n0 = 16 / b->element_size(); - rhs_info.k0 = 1; - rhs_info.h0 = mult_transpose1xW_width; - rhs_info.interleave = false; - rhs_info.transpose = false; - - GEMMLHSMatrixInfo lhs_info; - lhs_info.m0 = 4; - lhs_info.k0 = 4; - lhs_info.v0 = mult_interleave4x4_height; - lhs_info.interleave = true; - lhs_info.transpose = true; - - GEMMReshapeInfo reshape_info(m, n, k, mult_transpose1xW_width, mult_interleave4x4_height, depth_output_gemm3d, false, gemm_info.broadcast_bias()); - - // Configure interleave kernel - _reshape_lhs_kernel->configure(compile_context, a, &_tmp_a, lhs_info, reinterpret_input_as_3d); - - // Configure transpose kernel - _reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info); - - // Configure and tune matrix multiply kernel - _mm_kernel->configure(compile_context, &_tmp_a, &_tmp_b, c, output, alpha, beta, true, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info()); - - CLScheduler::get().tune_kernel_static(*_mm_kernel); - - // Request memory for LHS and RHS reshape matrix - _aux_mem[LhsReshape] = MemoryInfo(offset_int_vec(LhsReshape), MemoryLifetime::Temporary, _tmp_a.total_size()); - _aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size()); -} - -void ClGemm::configure_reshaped_v2(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, - const GEMMInfo &gemm_info) -{ - DataType data_type = a->data_type(); - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2); - const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); - const GPUTarget gpu_target = CLScheduler::get().target(); - bool broadcast_bias = gemm_info.broadcast_bias(); - - GEMMKernelInfo kernel_info; - kernel_info.m = m; - kernel_info.n = n; - kernel_info.k = k; - kernel_info.depth_output_gemm3d = depth_output_gemm3d; - kernel_info.reinterpret_input_as_3d = false; - kernel_info.broadcast_bias = broadcast_bias; - kernel_info.activation_info = gemm_info.activation_info(); - - // Set the target for the kernels - _reshape_lhs_kernel->set_target(gpu_target); - _mm_kernel->set_target(gpu_target); - - GEMMLHSMatrixInfo lhs_info{}; - GEMMRHSMatrixInfo rhs_info{}; - - // Pick up the GEMM configuration - std::tie(lhs_info, rhs_info) = auto_select_gemm_config_reshaped(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size }, kernel_info, a, b, - c, output, gemm_info.reinterpret_input_as_3d()); - - _reshape_lhs_kernel->configure(compile_context, a, &_tmp_a, lhs_info, gemm_info.reinterpret_input_as_3d()); - _reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info); - - // Configure and tune matrix multiply kernel - _mm_reshaped_kernel->configure(compile_context, &_tmp_a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info); - - // Request memory for LHS and RHS reshape matrix - _aux_mem[LhsReshape] = MemoryInfo(offset_int_vec(LhsReshape), MemoryLifetime::Temporary, _tmp_a.total_size()); - _aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size()); -} - -void ClGemm::configure_reshaped_only_rhs(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, - const GEMMInfo &gemm_info) -{ - DataType data_type = a->data_type(); - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2); - const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); - const GPUTarget gpu_target = CLScheduler::get().target(); - bool broadcast_bias = gemm_info.broadcast_bias(); - - GEMMKernelInfo kernel_info; - kernel_info.m = m; - kernel_info.n = n; - kernel_info.k = k; - kernel_info.depth_output_gemm3d = depth_output_gemm3d; - kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d; - kernel_info.broadcast_bias = broadcast_bias; - kernel_info.activation_info = gemm_info.activation_info(); - - // Set the target for the kernels - _mm_kernel->set_target(gpu_target); - - GEMMLHSMatrixInfo lhs_info{}; - GEMMRHSMatrixInfo rhs_info{}; - - // Pick up the GEMM configuration - std::tie(lhs_info, rhs_info) = auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size }, kernel_info, a, b, c, output); - - // Transpose matrix - _reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info); - - // Configure two variants of CLGEMMMatrixMultiplyReshapedOnlyRHSKernel (has_pad_y = false/true) - // During the prepare stage we check the padding requirement for the lhs and dst tensors. If they do not have - // pad y, we dispatch CLGEMMMatrixMultiplyReshapedOnlyRHSKernel with has_pad_y = false - - // Configure matrix multiply kernel with no y padding support - kernel_info.has_pad_y = false; - _mm_reshaped_only_rhs_kernel->configure(compile_context, a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info); - - // Configure matrix multiply kernel with y padding support - kernel_info.has_pad_y = true; - _mm_reshaped_only_rhs_fallback_kernel->configure(compile_context, a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info); - - // Request memory for RHS reshape matrix - _aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size()); -} - -Status ClGemm::validate_native_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info) -{ - ARM_COMPUTE_UNUSED(alpha); - ARM_COMPUTE_UNUSED(output); - - // Get the GPU target - const GPUTarget gpu_target = CLScheduler::get().target(); - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); - - const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d, gemm_info.broadcast_bias()); - - // Validate matrix multiply - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyKernel::validate(a, b, c, output, alpha, beta, - false, reshape_info, gpu_target, gemm_info.fp_mixed_precision(), gemm_info.activation_info())); - - return Status{}; -} - -Status ClGemm::validate_reshaped_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info) -{ - ARM_COMPUTE_UNUSED(alpha); - ARM_COMPUTE_UNUSED(output); - - TensorInfo tmp_a_info{}; - TensorInfo tmp_b_info{}; - - // Get the GPU target - const GPUTarget gpu_target = CLScheduler::get().target(); - const unsigned int m = gemm_info.reinterpret_input_as_3d() ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - int mult_transpose1xW_width = 1; - int mult_interleave4x4_height = 1; - const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); - - if(get_arch_from_target(gpu_target) == GPUTarget::BIFROST) - { - mult_transpose1xW_width = 4; - mult_interleave4x4_height = 2; - } - - GEMMRHSMatrixInfo rhs_info; - rhs_info.n0 = 16 / b->element_size(); - rhs_info.k0 = 1; - rhs_info.h0 = mult_transpose1xW_width; - rhs_info.interleave = false; - rhs_info.transpose = false; - - GEMMLHSMatrixInfo lhs_info; - lhs_info.m0 = 4; - lhs_info.k0 = 4; - lhs_info.v0 = mult_interleave4x4_height; - lhs_info.interleave = true; - lhs_info.transpose = true; - - const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, mult_transpose1xW_width, mult_interleave4x4_height, depth_output_gemm3d, false, gemm_info.broadcast_bias()); - - // Validate interleave kernel - auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*a, lhs_info, gemm_info.reinterpret_input_as_3d()))); - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeLhsMatrixKernel::validate(a, &tmp_a_info, lhs_info, gemm_info.reinterpret_input_as_3d())); - - // Validate transpose kernel - auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info))); - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info)); - - // Validate matrix multiply - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyKernel::validate(&tmp_a_info, &tmp_b_info, c, output, alpha, beta, - true, reshape_info, gpu_target, gemm_info.fp_mixed_precision(), gemm_info.activation_info())); - - return Status{}; -} - -Status ClGemm::validate_reshaped(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info) -{ - ARM_COMPUTE_UNUSED(alpha); - ARM_COMPUTE_UNUSED(output); - - TensorInfo tmp_a_info{}; - TensorInfo tmp_b_info{}; - - // Get the GPU target - const GPUTarget gpu_target = CLScheduler::get().target(); - DataType data_type = a->data_type(); - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2); - const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); - const bool broadcast_bias = gemm_info.broadcast_bias(); - - GEMMKernelInfo kernel_info; - kernel_info.m = m; - kernel_info.n = n; - kernel_info.k = k; - kernel_info.depth_output_gemm3d = depth_output_gemm3d; - kernel_info.reinterpret_input_as_3d = false; - kernel_info.broadcast_bias = broadcast_bias; - kernel_info.activation_info = gemm_info.activation_info(); - - GEMMLHSMatrixInfo lhs_info; - GEMMRHSMatrixInfo rhs_info; - - // Pick up the GEMM configuration - // NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails - const auto gemm_config = select_default_gemm_config_reshaped(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size }); - lhs_info = gemm_config.lhs_info; - rhs_info = gemm_config.rhs_info; - - auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*a, lhs_info, gemm_info.reinterpret_input_as_3d()))); - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeLhsMatrixKernel::validate(a, &tmp_a_info, lhs_info, gemm_info.reinterpret_input_as_3d())); - - auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info))); - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info)); - - // Validate matrix multiply - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedKernel::validate(&tmp_a_info, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info)); - - return Status{}; -} - -Status ClGemm::validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info) -{ - ARM_COMPUTE_UNUSED(alpha); - ARM_COMPUTE_UNUSED(output); - - TensorInfo tmp_b_info{}; - - // Get the GPU target - const GPUTarget gpu_target = CLScheduler::get().target(); - const DataType data_type = a->data_type(); - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2); - const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); - const bool broadcast_bias = gemm_info.broadcast_bias(); - - GEMMKernelInfo kernel_info; - kernel_info.m = m; - kernel_info.n = n; - kernel_info.k = k; - kernel_info.depth_output_gemm3d = depth_output_gemm3d; - kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d; - kernel_info.broadcast_bias = broadcast_bias; - kernel_info.activation_info = gemm_info.activation_info(); - - GEMMLHSMatrixInfo lhs_info; - GEMMRHSMatrixInfo rhs_info; - - // Pick up the GEMM configuration - // NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails - const auto gemm_config = select_default_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size }); - lhs_info = gemm_config.lhs_info; - rhs_info = gemm_config.rhs_info; - - auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info))); - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info)); - - // Validate matrix multiply - kernel_info.has_pad_y = false; - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info)); - - kernel_info.has_pad_y = true; - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info)); - - return Status{}; -} - -void ClGemm::configure(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output); - - // Perform validation step - ARM_COMPUTE_ERROR_THROW_ON(validate(a, b, c, output, alpha, beta, gemm_info)); - - // Check if we need to reshape the matrix B only on the first run - _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run(); - _is_prepared = gemm_info.retain_internal_weights(); - - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2); - - // Select GEMMType - _gemm_kernel_type = auto_select_gemm_kernel(auto_heuristics::CommonQuery{ CLScheduler::get().target(), a->data_type(), m, n, k, batch_size }, _reshape_b_only_on_first_run, - gemm_info.constant_weights()); - - const bool fuse_add_c = (!(helpers::float_ops::is_zero(beta)) && c != nullptr); - - ITensorInfo *c_to_use = fuse_add_c ? c : nullptr; - - switch(_gemm_kernel_type) - { - case CLGEMMKernelType::NATIVE_V1: - { - configure_native_v1(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info); - break; - } - case CLGEMMKernelType::RESHAPED_V1: - { - configure_reshaped_v1(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info); - break; - } - case CLGEMMKernelType::RESHAPED: - { - configure_reshaped_v2(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info); - break; - } - case CLGEMMKernelType::RESHAPED_ONLY_RHS: - { - configure_reshaped_only_rhs(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info); - break; - } - default: - { - ARM_COMPUTE_ERROR("GEMMType not supported"); - } - } -} - -Status ClGemm::validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info) -{ - // Get the GPU target - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2); - - // Select GEMMType - CLGEMMKernelType gemm_kernel_type = auto_select_gemm_kernel(auto_heuristics::CommonQuery - { - CLScheduler::get().target(), a->data_type(), m, n, k, batch_size, - }, - gemm_info.reshape_b_only_on_first_run(), gemm_info.constant_weights()); - - const bool fuse_add_c = (!(helpers::float_ops::is_zero(beta)) && c != nullptr); - - const ITensorInfo *c_to_use = fuse_add_c ? c : nullptr; - - switch(gemm_kernel_type) - { - case CLGEMMKernelType::NATIVE_V1: - { - ARM_COMPUTE_RETURN_ON_ERROR(validate_native_v1(a, b, c_to_use, output, alpha, beta, gemm_info)); - break; - } - case CLGEMMKernelType::RESHAPED_V1: - { - ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped_v1(a, b, c_to_use, output, alpha, beta, gemm_info)); - break; - } - case CLGEMMKernelType::RESHAPED: - { - ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped(a, b, c_to_use, output, alpha, beta, gemm_info)); - break; - } - case CLGEMMKernelType::RESHAPED_ONLY_RHS: - { - ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped_only_rhs(a, b, c_to_use, output, alpha, beta, gemm_info)); - break; - } - default: - { - ARM_COMPUTE_RETURN_ERROR_MSG("GEMMType not supported"); - } - } - - return Status{}; -} - -void ClGemm::run(ITensorPack &tensors) -{ - const ITensor *lhs = tensors.get_const_tensor(ACL_SRC_0); - const ITensor *rhs = tensors.get_const_tensor(ACL_SRC_1); - const ITensor *src2 = tensors.get_const_tensor(ACL_SRC_2); - ITensor *dst = tensors.get_tensor(ACL_DST); - - ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, dst); - - CLAuxTensorHandler lhs_reshaped(offset_int_vec(LhsReshape), _tmp_a, tensors, true); - CLAuxTensorHandler rhs_reshaped(offset_int_vec(RhsReshape), _tmp_b, tensors, true); - - // Prepare the consts if needed - prepare(tensors); - - // Run matrix multiply kernel - switch(_gemm_kernel_type) - { - case CLGEMMKernelType::NATIVE_V1: - { - CLScheduler::get().enqueue_op(*_mm_kernel, tensors, true); - break; - } - case CLGEMMKernelType::RESHAPED_V1: - case CLGEMMKernelType::RESHAPED: - { - // Run interleave kernel - ITensorPack reshape_lhs_pack{ { ACL_SRC, lhs }, { ACL_DST, lhs_reshaped.get() } }; - CLScheduler::get().enqueue_op(*_reshape_lhs_kernel, reshape_lhs_pack, false); - - if(!_reshape_b_only_on_first_run) - { - // Run transpose kernel - ITensorPack reshape_rhs_pack{ { ACL_SRC, rhs }, { ACL_DST, rhs_reshaped.get() } }; - CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, false); - } - - ITensorPack gemm_reshaped_pack{ { ACL_SRC_0, lhs_reshaped.get() }, { ACL_SRC_1, rhs_reshaped.get() }, { ACL_SRC_2, src2 }, { ACL_DST, dst } }; - - if(_gemm_kernel_type == CLGEMMKernelType::RESHAPED) - { - CLScheduler::get().enqueue_op(*_mm_reshaped_kernel, gemm_reshaped_pack, true); - } - else - { - CLScheduler::get().enqueue_op(*_mm_kernel, gemm_reshaped_pack, true); - } - break; - } - case CLGEMMKernelType::RESHAPED_ONLY_RHS: - { - if(!_reshape_b_only_on_first_run) - { - // Run transpose kernel - ITensorPack reshape_rhs_pack{ { ACL_SRC, rhs }, { ACL_DST, rhs_reshaped.get() } }; - CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, false); - } - // In case of RESHAPED_ONLY_RHS, we need to check the padding requirement - // Check if the lhs or dst tensors have padding - const unsigned int cross_plane_pad_lhs = lhs->info()->padding().top + lhs->info()->padding().bottom; - const unsigned int cross_plane_pad_dst = dst->info()->padding().top + dst->info()->padding().bottom; - bool has_pad_y = (cross_plane_pad_lhs != 0) || (cross_plane_pad_dst != 0); - - ITensorPack gemm_reshaped_onlyrhs_pack{ { ACL_SRC_0, lhs }, { ACL_SRC_1, rhs_reshaped.get() }, { ACL_SRC_2, src2 }, { ACL_DST, dst } }; - if(has_pad_y) - { - CLScheduler::get().enqueue_op(*_mm_reshaped_only_rhs_fallback_kernel, gemm_reshaped_onlyrhs_pack, true); - } - else - { - CLScheduler::get().enqueue_op(*_mm_reshaped_only_rhs_kernel, gemm_reshaped_onlyrhs_pack, true); - } - break; - } - default: - { - ARM_COMPUTE_ERROR("GEMMType not supported"); - } - } -} - -void ClGemm::prepare(ITensorPack &constants) -{ - if(!_is_prepared) - { - const ITensor *src1 = constants.get_const_tensor(ACL_SRC_1); - ICLTensor *rhs_aux = utils::cast::polymorphic_downcast<ICLTensor *>(constants.get_tensor(offset_int_vec(RhsReshape))); - - // If memory for RHS is persistent and src1 is provided re-transform else assume that RHS is transformed - if((_aux_mem[AuxTensorIdx::RhsReshape].lifetime == MemoryLifetime::Persistent) && (src1 != nullptr && rhs_aux != nullptr) && rhs_aux) - { - ARM_COMPUTE_LOG_INFO_WITH_FUNCNAME_ACL("Transforming RHS Matrix!"); - - CLAuxTensorHandler rhs_reshaped(_tmp_b, *rhs_aux); - ARM_COMPUTE_ERROR_ON(rhs_reshaped.get()->cl_buffer().get() == nullptr); - - ITensorPack reshape_rhs_pack{ { ACL_SRC, src1 }, { ACL_DST, rhs_reshaped.get() } }; - CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, true); - } - _is_prepared = true; - } -} - -experimental::MemoryRequirements ClGemm::workspace() const -{ - return _aux_mem; -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClGemm.h b/src/runtime/gpu/cl/operators/ClGemm.h deleted file mode 100644 index 254344e862..0000000000 --- a/src/runtime/gpu/cl/operators/ClGemm.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_GEMM_H -#define ARM_COMPUTE_CL_GEMM_H - -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTypes.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/IClKernel.h" -#include "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.h" -#include "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h" -#include "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h" -#include "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h" -#include "src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h" -#include "src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -#include <memory> - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to execute GEMM on OpenCL. This function calls the following OpenCL kernels: - * - * -# @ref kernels::ClGemmReshapeLhsMatrixKernel (only if the RESHAPED_V1 is selected by the heuristic model) - * -# @ref kernels::ClGemmReshapeRhsMatrixKernel (only if either the RESHAPED_V1 or RESHAPED_ONLY_RHS is selected by the select_gemm_kernel method()) - * -# @ref kernels::ClGemmMatrixMultiplyKernel (only if either the NATIVE or RESHAPED_V1 is selected by the select_gemm_kernel method()) - * -# @ref kernels::ClGemmMatrixMultiplyReshapedKernel (only if RESHAPED_V1 is selected by the select_gemm_kernel method()) - * -# @ref kernels::ClGemmMatrixMultiplyReshapedOnlyRhsKernel (only if RESHAPED_ONLY_RHS is selected by the select_gemm_kernel method()) - */ -class ClGemm : public IClOperator -{ -public: - /** Constructor */ - ClGemm(); - /** Initialise the kernel's inputs and output - * - * Valid data layouts: - * - All - * - * Valid data type configurations: - * |src0 |src1 |src2 |dst | - * |:------------|:-----------|:---------|:--------------| - * |F32 |F32 |F32 |F32 | - * |F16 |F16 |F16 |F16 | - * - * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C]. - * - * @note All tensors must have the same data type. - * - * @note Whilst the first input tensor can be a vector, the second input tensor must be at least a matrix - * - * @param[in] compile_context The compile context to be used. - * @param[in] a First input tensor (Matrix or Vector A). Data types supported: F16/F32 - * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a. - * @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a. - * @param[out] output Output tensor. Data type supported: same as @p a - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of matrix C - * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and - * if the reshape of matrix B should happen only for the first run. GEMMInfo also contains information about the reshaping - * in case matrix A and matrix B have been already transformed. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to ClGemm::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); - - // Inherited methods overridden: - void run(ITensorPack &tensors) override; - void prepare(ITensorPack &constants) override; - experimental::MemoryRequirements workspace() const override; - -private: - void configure_native_v1(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); - void configure_reshaped_v1(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); - void configure_reshaped_v2(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); - void configure_reshaped_only_rhs(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); - - static Status validate_native_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); - static Status validate_reshaped_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); - static Status validate_reshaped(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); - static Status validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); - -private: - enum AuxTensorIdx - { - LhsReshape = 0, - RhsReshape, - Count - }; - -private: - std::unique_ptr<kernels::ClGemmMatrixMultiplyKernel> _mm_kernel; - std::unique_ptr<kernels::ClGemmReshapeLhsMatrixKernel> _reshape_lhs_kernel; - std::unique_ptr<kernels::ClGemmReshapeRhsMatrixKernel> _reshape_rhs_kernel; - std::unique_ptr<kernels::ClGemmMatrixMultiplyReshapedKernel> _mm_reshaped_kernel; - std::unique_ptr<kernels::ClGemmMatrixMultiplyReshapedOnlyRhsKernel> _mm_reshaped_only_rhs_kernel; - std::unique_ptr<kernels::ClGemmMatrixMultiplyReshapedOnlyRhsKernel> _mm_reshaped_only_rhs_fallback_kernel; - TensorInfo _tmp_a; - TensorInfo _tmp_b; - bool _reshape_b_only_on_first_run; - CLGEMMKernelType _gemm_kernel_type; - bool _is_prepared; - experimental::MemoryRequirements _aux_mem{}; -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLGEMM_H */ diff --git a/src/runtime/gpu/cl/operators/ClGemmConv2d.cpp b/src/runtime/gpu/cl/operators/ClGemmConv2d.cpp deleted file mode 100644 index 8c796e0712..0000000000 --- a/src/runtime/gpu/cl/operators/ClGemmConv2d.cpp +++ /dev/null @@ -1,628 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClGemmConv2d.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/Size2D.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/utils/quantization/AsymmHelpers.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/gpu/cl/kernels/ClActivationKernel.h" -#include "src/core/gpu/cl/kernels/ClCol2ImKernel.h" -#include "src/core/gpu/cl/kernels/ClIm2ColKernel.h" -#include "src/core/gpu/cl/kernels/ClWeightsReshapeKernel.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/MemoryHelpers.h" -#include "src/runtime/gpu/cl/operators/ClGemm.h" -#include "src/runtime/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.h" -#include "src/runtime/gpu/cl/utils/ClAuxTensorHandler.h" -#include "support/Cast.h" - -namespace arm_compute -{ -using namespace experimental; -using namespace misc::shape_calculator; -using namespace utils::cast; -namespace opencl -{ -ClGemmConv2d::ClGemmConv2d() - : _weights_reshape_kernel(nullptr), _im2col_kernel(nullptr), _mm_gemm(nullptr), _mm_gemmlowp(nullptr), _col2im_kernel(nullptr), _activation_kernel(nullptr), _im2col_output(), _weights_reshaped(), - _gemm_output(), _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _fuse_activation(true), _append_bias(false), _is_prepared(false), _aux_mem(AuxTensorIdx::Count) -{ -} -ClGemmConv2d::~ClGemmConv2d() = default; - -void ClGemmConv2d::configure_mm(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, - const GEMMLowpOutputStageInfo &gemmlowp_output_stage, - int gemm_3d_depth, const ActivationLayerInfo &act_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights); - ARM_COMPUTE_ERROR_THROW_ON(validate_mm(src, weights, biases, dst, gemmlowp_output_stage, gemm_3d_depth, _skip_im2col, act_info)); - - const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped - false, // is_b_reshaped - true, // reshape_b_only_on_first_run - gemm_3d_depth, // depth_output_gemm3d - _skip_im2col, // reinterpret_input_as_3d - false, // retain_internal_weights - gemmlowp_output_stage, // gemmlowp_output_stage - false, // fast_math - false, // fp_mixed_precision - true, // broadcast_bias - act_info); // activation_info - - TensorInfo tmp_src{ *src }; - if(_is_quantized) - { - // Since we need negative offsets for computing convolution, we need to change QuantizationInfo() - // Extract and negate input and weights offset - const QuantizationInfo input_quantization_info = src->quantization_info(); - const QuantizationInfo weights_quantization_info = weights->quantization_info(); - - tmp_src.set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset)); - weights->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset)); - - _mm_gemmlowp = std::make_unique<ClGemmLowpMatrixMultiplyCore>(); - _mm_gemmlowp->configure(compile_context, &tmp_src, weights, biases, dst, gemm_info); - - // Revert back QuantizatioInfo as weights could be used in other convolution layers - weights->set_quantization_info(weights_quantization_info); - - auto mm_mem_req = _mm_gemmlowp->workspace(); - for(unsigned int cont = 0; cont < mm_mem_req.size(); ++cont) - { - _aux_mem[cont] = mm_mem_req[cont]; - } - } - else - { - // Configure matrix multiply function - _mm_gemm = std::make_unique<ClGemm>(); - _mm_gemm->configure(compile_context, &tmp_src, weights, biases, dst, 1.0f, 1.0f, gemm_info); - auto mm_mem_req = _mm_gemm->workspace(); - for(unsigned int cont = 0; cont < mm_mem_req.size(); ++cont) - { - _aux_mem[cont] = mm_mem_req[cont]; - } - } -} - -Status ClGemmConv2d::validate_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, - const GEMMLowpOutputStageInfo &gemmlowp_output_stage, int gemm_3d_depth, bool skip_im2col, const ActivationLayerInfo &act_info) -{ - const bool is_quantized = is_data_type_quantized_asymmetric(src->data_type()); - - const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped - false, // is_b_reshaped - true, // reshape_b_only_on_first_run - gemm_3d_depth, // depth_output_gemm3d - skip_im2col, // reinterpret_input_as_3d - false, // retain_internal_weights - gemmlowp_output_stage, // gemmlowp_output_stage - false, // fast_math - false, // fp_mixed_precision - true, // broadcast_bias - act_info); // activation_info - - if(is_quantized) - { - // Since we need negative offsets for computing convolution, we need to change QuantizationInfo() - // Extract and negate input and weights offset - const QuantizationInfo input_quantization_info = src->quantization_info(); - const QuantizationInfo weights_quantization_info = weights->quantization_info(); - - std::unique_ptr<ITensorInfo> src_qa = src->clone(); - std::unique_ptr<ITensorInfo> weights_qa = weights->clone(); - src_qa->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset)); - weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset)); - - // Perform validation step on GEMMLowp - return ClGemmLowpMatrixMultiplyCore::validate(src_qa.get(), weights_qa.get(), biases, dst, gemm_info); - } - else - { - // Perform validation step on Matrix multiply function - return ClGemm::validate(src, weights, biases, dst, 1.0f, 1.0f, gemm_info); - } -} - -void ClGemmConv2d::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, - const Conv2dInfo &conv2d_info, const WeightsInfo &weights_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst); - - ARM_COMPUTE_ERROR_THROW_ON(ClGemmConv2d::validate(src, weights, biases, dst, - conv2d_info, - weights_info)); - - const DataType data_type = src->data_type(); - const DataLayout data_layout = src->data_layout(); - const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const int idx_kernels = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES); - - const unsigned int kernel_width = weights->dimension(idx_width); - const unsigned int kernel_height = weights->dimension(idx_height); - const unsigned int num_kernels = weights->dimension(idx_kernels); - - const UniformQuantizationInfo iq_info = src->quantization_info().uniform(); - const UniformQuantizationInfo oq_info = dst->quantization_info().uniform(); - - _is_prepared = weights_info.retain_internal_weights(); - _is_quantized = is_data_type_quantized_asymmetric(src->data_type()); - _skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv2d_info.conv_info.stride().first == 1 && conv2d_info.conv_info.stride().second == 1); - _skip_col2im = data_layout == DataLayout::NHWC; - - // Only for quantize there are few cases where we cannot fuse the activation function in GEMM - _fuse_activation = true; - - const ITensorInfo *gemm_input_to_use = src; - ITensorInfo *gemm_output_to_use = dst; - - // Get parameters from conv_info - unsigned int stride_x = 0; - unsigned int stride_y = 0; - std::tie(stride_x, stride_y) = conv2d_info.conv_info.stride(); - - // Get convolved dimensions - unsigned int conv_w = 0; - unsigned int conv_h = 0; - std::tie(conv_w, conv_h) = scaled_dimensions(src->dimension(idx_width), - src->dimension(idx_height), - kernel_width, - kernel_height, - conv2d_info.conv_info, - conv2d_info.dilation); - - unsigned int mat_weights_cols = num_kernels / conv2d_info.num_groups; - - ITensorInfo *biases_to_use = biases; - _append_bias = false; - - _weights_reshape_kernel = std::make_unique<kernels::ClWeightsReshapeKernel>(); - if(conv2d_info.num_groups != 1 && biases != nullptr) - { - // num_groups != 1 can only be for NCHW - // Since it is missing an utility function to reshape the biases, we append the biases into the weights tensor - biases_to_use = nullptr; - _append_bias = true; - _weights_reshape_kernel->configure(compile_context, weights, biases, &_weights_reshaped, conv2d_info.num_groups); - } - else - { - _weights_reshape_kernel->configure(compile_context, weights, nullptr, &_weights_reshaped, conv2d_info.num_groups); - } - - // Create tensor to store im2col reshaped inputs - if(!_skip_im2col) - { - // Configure and tune im2col. im2col output shape is auto-initialized - _im2col_kernel = std::make_unique<opencl::kernels::ClIm2ColKernel>(); - - // Set the GPU target for im2col - _im2col_kernel->set_target(CLScheduler::get().target()); - _im2col_kernel->configure(compile_context, src, &_im2col_output, Size2D(kernel_width, kernel_height), conv2d_info.conv_info, _append_bias, conv2d_info.dilation, conv2d_info.num_groups); - - // Set quantization info - _im2col_output.set_quantization_info(src->quantization_info()); - CLScheduler::get().tune_kernel_static(*_im2col_kernel); - - // Update GEMM input - gemm_input_to_use = &_im2col_output; - } - - // Create GEMM output tensor - if(!_skip_col2im) - { - TensorShape shape_gemm; - - // If we cannot skip col2im it means we run im2col as well - shape_gemm = _im2col_output.tensor_shape(); - shape_gemm.set(0, mat_weights_cols); - shape_gemm.set(1, conv_w * conv_h); - - _gemm_output = TensorInfo(shape_gemm, 1, data_type); - _gemm_output.set_quantization_info(dst->quantization_info()).set_data_layout(src->data_layout()); - - // Update GEMM output - gemm_output_to_use = &_gemm_output; - } - - GEMMLowpOutputStageInfo gemmlowp_output_stage; - gemmlowp_output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; - gemmlowp_output_stage.gemmlowp_offset = 0; - - // Configure output stage for quantized case - if(_is_quantized) - { - const auto output_quant_info = (dst->total_size() == 0) ? iq_info : oq_info; - const bool is_quantized_per_channel = is_data_type_quantized_per_channel(weights->data_type()); - const unsigned int num_filters = (is_quantized_per_channel) ? num_kernels : 1; - - gemmlowp_output_stage.is_quantized_per_channel = is_quantized_per_channel; - - gemmlowp_output_stage.gemmlowp_multipliers.resize(num_filters); - gemmlowp_output_stage.gemmlowp_shifts.resize(num_filters); - quantization::compute_quantized_multipliers_and_shifts(src, weights, dst, - gemmlowp_output_stage.gemmlowp_multipliers.data(), - gemmlowp_output_stage.gemmlowp_shifts.data()); - gemmlowp_output_stage.gemmlowp_multiplier = gemmlowp_output_stage.gemmlowp_multipliers[0]; - gemmlowp_output_stage.gemmlowp_shift = gemmlowp_output_stage.gemmlowp_shifts[0]; - - PixelValue min_val{}; - PixelValue max_val{}; - std::tie(min_val, max_val) = get_min_max(dst->data_type()); - - auto min_activation = min_val.get<int32_t>(); - auto max_activation = max_val.get<int32_t>(); - - const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU, - ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, - ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU - }; - - if(conv2d_info.act_info.enabled()) - { - if(supported_acts.count(conv2d_info.act_info.activation()) != 0) - { - std::tie(min_activation, max_activation) = get_quantized_activation_min_max(conv2d_info.act_info, data_type, output_quant_info); - } - else - { - _fuse_activation = false; - } - } - - // Set the GEMMLowp output stage info - gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset; - gemmlowp_output_stage.gemmlowp_min_bound = min_activation; - gemmlowp_output_stage.gemmlowp_max_bound = max_activation; - } - - // Configure and tune GEMM - // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix - const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0; - - configure_mm(compile_context, gemm_input_to_use, &_weights_reshaped, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, conv2d_info.act_info); - - if(!_skip_col2im) - { - // Set the GPU target for col2im - _col2im_kernel = std::make_unique<opencl::kernels::ClCol2ImKernel>(); - _col2im_kernel->set_target(CLScheduler::get().target()); - // Configure and tune Col2Im - _col2im_kernel->configure(compile_context, gemm_output_to_use, dst, Size2D(conv_w, conv_h), conv2d_info.num_groups); - CLScheduler::get().tune_kernel_static(*_col2im_kernel.get()); - } - - ARM_COMPUTE_ERROR_ON_MSG((dst->dimension(idx_width) != conv_w) || (dst->dimension(idx_height) != conv_h), - "Output shape does not match the expected one"); - - if(!_fuse_activation) - { - _activation_kernel = std::make_unique<opencl::kernels::ClActivationKernel>(); - _activation_kernel->configure(compile_context, dst, nullptr, conv2d_info.act_info); - } - - _aux_mem[Im2ColOutput] = MemoryInfo(offset_int_vec(Im2ColOutput), MemoryLifetime::Temporary, _im2col_output.total_size()); - _aux_mem[WeightsReshaped] = MemoryInfo(offset_int_vec(WeightsReshaped), MemoryLifetime::Persistent, _weights_reshaped.total_size()); - _aux_mem[GemmOutput] = MemoryInfo(offset_int_vec(GemmOutput), MemoryLifetime::Temporary, _gemm_output.total_size()); -} - -Status ClGemmConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const Conv2dInfo &conv2d_info, - const WeightsInfo &weights_info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!"); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); - const bool is_quantized_per_channel = is_data_type_quantized_per_channel(weights->data_type()); - - if(!is_quantized_per_channel) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, weights); - } - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, weights); - ARM_COMPUTE_RETURN_ERROR_ON_MSG((conv2d_info.num_groups != 1) && (src->data_layout() != DataLayout::NCHW), "Grouping (num_groups != 1) with NHWC data layout is not supported"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG((conv2d_info.num_groups != 1) && (src->data_type() == DataType::QASYMM8), "Grouping (num_groups != 1) is not supported with QASYMM8"); - ARM_COMPUTE_RETURN_ERROR_ON(((src->dimension(2) / weights->dimension(2)) != conv2d_info.num_groups) && (src->data_layout() == DataLayout::NCHW)); - - const DataLayout data_layout = src->data_layout(); - const DataType data_type = src->data_type(); - const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const int idx_channel = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - const int idx_kernels = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES); - - const unsigned int kernel_width = weights->dimension(idx_width); - const unsigned int kernel_height = weights->dimension(idx_height); - const unsigned int num_kernels = weights->dimension(idx_kernels); - - TensorInfo im2col_reshaped_info{}; - TensorInfo info_gemm{}; - TensorInfo weights_reshaped_info{}; - const ITensorInfo *gemm_input_to_use = src; - const ITensorInfo *gemm_output_to_use = dst; - const ITensorInfo *weights_to_use = weights; - const bool is_quantized = is_data_type_quantized_asymmetric(data_type); - const bool skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv2d_info.conv_info.stride().first == 1 - && conv2d_info.conv_info.stride().second == 1); - const bool skip_col2im = data_layout == DataLayout::NHWC; - bool fuse_activation = true; - - ARM_COMPUTE_RETURN_ERROR_ON((weights->dimension(idx_channel) * conv2d_info.num_groups) != src->dimension(idx_channel)); - ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4); - - // Validate biases - if(biases != nullptr) - { - if(is_quantized) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32); - } - else - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, biases); - } - ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(idx_kernels)); - ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1); - } - - if(conv2d_info.act_info.enabled()) - { - ARM_COMPUTE_ERROR_ON(conv2d_info.act_info.b() > conv2d_info.act_info.a()); - } - - // Get convolved dimensions - unsigned int conv_w = 0; - unsigned int conv_h = 0; - - std::tie(conv_w, conv_h) = scaled_dimensions(src->dimension(idx_width), - src->dimension(idx_height), - kernel_width, - kernel_height, - conv2d_info.conv_info, - conv2d_info.dilation); - - unsigned int mat_weights_cols = num_kernels / conv2d_info.num_groups; - - const ITensorInfo *biases_to_use = biases; - bool append_bias = false; - - if(conv2d_info.num_groups != 1 && biases != nullptr) - { - // num_groups != 1 can only be for NCHW - // Since it is missing an utility function to reshape the biases, we append the biases into the weights tensor - biases_to_use = nullptr; - append_bias = true; - weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, true, conv2d_info.num_groups), 1, data_type); - } - else - { - weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, false, conv2d_info.num_groups), 1, data_type); - } - - weights_to_use = &weights_reshaped_info; - - if(!skip_im2col) - { - const Size2D kernel_dims(kernel_width, kernel_height); - - // Output tensor auto initialization if not yet initialized - TensorShape expected_output_shape = compute_im2col_conv_shape(src, kernel_dims, conv2d_info.conv_info, append_bias, conv2d_info.dilation, conv2d_info.num_groups == 1, conv2d_info.num_groups); - - auto_init_if_empty(im2col_reshaped_info, src->clone()->set_tensor_shape(expected_output_shape)); - - ARM_COMPUTE_RETURN_ON_ERROR(opencl::kernels::ClIm2ColKernel::validate(src, &im2col_reshaped_info, kernel_dims, conv2d_info.conv_info, append_bias, conv2d_info.dilation, conv2d_info.num_groups)); - gemm_input_to_use = &im2col_reshaped_info; - } - - // Create GEMM output tensor - if(!skip_col2im) - { - TensorShape shape_gemm; - - shape_gemm = gemm_input_to_use->tensor_shape(); - shape_gemm.set(0, mat_weights_cols); - shape_gemm.set(1, conv_w * conv_h); - - info_gemm = TensorInfo(shape_gemm, 1, data_type); - info_gemm.set_quantization_info(dst->quantization_info()).set_data_layout(src->data_layout()); - gemm_output_to_use = &info_gemm; - } - - GEMMLowpOutputStageInfo gemmlowp_output_stage; - gemmlowp_output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; - gemmlowp_output_stage.gemmlowp_offset = 0; - gemmlowp_output_stage.is_quantized_per_channel = is_quantized_per_channel; - - if(is_quantized) - { - const UniformQuantizationInfo iq_info = src->quantization_info().uniform(); - const UniformQuantizationInfo oq_info = dst->quantization_info().uniform(); - const auto output_quant_info = (dst->total_size() == 0) ? iq_info : oq_info; - const unsigned int num_filters = (is_quantized_per_channel) ? num_kernels : 1; - - gemmlowp_output_stage.gemmlowp_multipliers.resize(num_filters); - gemmlowp_output_stage.gemmlowp_shifts.resize(num_filters); - quantization::compute_quantized_multipliers_and_shifts(src, weights, dst, - gemmlowp_output_stage.gemmlowp_multipliers.data(), - gemmlowp_output_stage.gemmlowp_shifts.data()); - gemmlowp_output_stage.gemmlowp_multiplier = gemmlowp_output_stage.gemmlowp_multipliers[0]; - gemmlowp_output_stage.gemmlowp_shift = gemmlowp_output_stage.gemmlowp_shifts[0]; - - int min_activation = 0; - int max_activation = 0; - - const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU, - ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, - ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU - }; - - if(conv2d_info.act_info.enabled()) - { - if(supported_acts.count(conv2d_info.act_info.activation()) != 0) - { - std::tie(min_activation, max_activation) = get_quantized_activation_min_max(conv2d_info.act_info, data_type, output_quant_info); - } - else - { - fuse_activation = false; - } - } - - // Set the GEMMLowp output stage info - gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset; - gemmlowp_output_stage.gemmlowp_min_bound = min_activation; - gemmlowp_output_stage.gemmlowp_max_bound = max_activation; - } - - // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix - const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0; - - ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, skip_im2col, conv2d_info.act_info)); - - // Validate Col2Im - if(!skip_col2im) - { - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClCol2ImKernel::validate(gemm_output_to_use, dst, Size2D(conv_w, conv_h), conv2d_info.num_groups)); - } - - //Validate Activation Layer - if(!fuse_activation) - { - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClActivationKernel::validate(dst, nullptr, conv2d_info.act_info)); - } - - return Status{}; -} - -void ClGemmConv2d::run(ITensorPack &tensors) -{ - prepare(tensors); - - auto src = tensors.get_const_tensor(ACL_SRC_0); - auto biases = tensors.get_const_tensor(ACL_SRC_2); - auto dst = tensors.get_tensor(ACL_DST); - auto gemm_input_to_use = src; - auto gemm_output_to_use = dst; - - CLAuxTensorHandler im2col_output(offset_int_vec(Im2ColOutput), _im2col_output, tensors, false); - CLAuxTensorHandler gemm_output(offset_int_vec(GemmOutput), _gemm_output, tensors, false); - CLAuxTensorHandler weights_reshaped(offset_int_vec(WeightsReshaped), _weights_reshaped, tensors, false); - - // Run im2col - if(!_skip_im2col) - { - ITensorPack pack = - { - { TensorType::ACL_SRC, src }, - { TensorType::ACL_DST, im2col_output.get() } - }; - CLScheduler::get().enqueue_op(*_im2col_kernel, pack, false); - gemm_input_to_use = im2col_output.get(); - } - if(!_skip_col2im) - { - gemm_output_to_use = gemm_output.get(); - } - ITensorPack pack_mm = tensors; - pack_mm.add_const_tensor(TensorType::ACL_SRC_0, gemm_input_to_use); - pack_mm.add_const_tensor(TensorType::ACL_SRC_1, weights_reshaped.get()); - if(!_append_bias) - { - pack_mm.add_const_tensor(TensorType::ACL_SRC_2, biases); - } - pack_mm.add_tensor(TensorType::ACL_DST, gemm_output_to_use); - // Runs ClGemm or ClGemmLowpMatrixMultiplyCore functions - if(_is_quantized) - { - // Run gemmlowp - _mm_gemmlowp->run(pack_mm); - } - else - { - // Run gemm - _mm_gemm->run(pack_mm); - } - - // Reshape output matrix - if(!_skip_col2im) - { - ITensorPack pack = - { - { TensorType::ACL_SRC, gemm_output_to_use }, - { TensorType::ACL_DST, dst } - }; - CLScheduler::get().enqueue_op(*_col2im_kernel.get(), pack, false); - } - - //Run Activation Layer if we cannot fuse in GEMM - if(!_fuse_activation) - { - ITensorPack pack = - { - { TensorType::ACL_SRC, dst }, - { TensorType::ACL_DST, dst } - }; - CLScheduler::get().enqueue_op(*_activation_kernel.get(), pack, false); - } -} - -void ClGemmConv2d::prepare(ITensorPack &tensors) -{ - if(!_is_prepared) - { - // Run weights reshaping and mark original weights tensor as unused - ICLTensor *weights_reshaped_p = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(offset_int_vec(WeightsReshaped))); - CLAuxTensorHandler weights_reshaped(_weights_reshaped, *weights_reshaped_p); - auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1); - ITensorPack pack = - { - { TensorType::ACL_SRC, weights }, - { TensorType::ACL_DST, weights_reshaped.get() } - }; - - if(_append_bias) - { - const auto biases = tensors.get_const_tensor(TensorType::ACL_SRC_2); - pack.add_const_tensor(TensorType::ACL_BIAS, biases); - } - CLScheduler::get().enqueue_op(*_weights_reshape_kernel.get(), pack, true); - tensors.add_const_tensor(TensorType::ACL_SRC_1, weights_reshaped.get()); - - // Prepare GEMM - _is_quantized ? _mm_gemmlowp->prepare(tensors) : _mm_gemm->prepare(tensors); - _is_prepared = true; - } -} -experimental::MemoryRequirements ClGemmConv2d::workspace() const -{ - return _aux_mem; -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClGemmConv2d.h b/src/runtime/gpu/cl/operators/ClGemmConv2d.h deleted file mode 100644 index e16d029e71..0000000000 --- a/src/runtime/gpu/cl/operators/ClGemmConv2d.h +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_GEMM_CONV2D_H -#define ARM_COMPUTE_CL_GEMM_CONV2D_H - -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/FunctionDescriptors.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -#include <memory> - -namespace arm_compute -{ -namespace opencl -{ -class ClGemm; -class ClGemmLowpMatrixMultiplyCore; -namespace kernels -{ -class ClIm2ColKernel; -class ClCol2ImKernel; -class ClWeightsReshapeKernel; -class ClActivationKernel; -} // namespace kernels - -/** Basic function to compute the convolution layer. This function calls the following OpenCL kernels/functions: - * - * -# @ref opencl::kernels::ClIm2ColKernel - * -# @ref ClGemm (if the data type is FP32 or FP16) - * -# @ref CLGEMMLowpMatrixMultiplyCore (if the data type is QASYMM8/QASYMM8_SIGNED) - * -# @ref ClGemmLowpOutputStage with QUANTIZE_DOWN_FIXEDPOINT type of quantization (if the data type is QASYMM8/QASYMM8_SIGNED) - * -# @ref opencl::kernels::ClCol2ImKernel (if NCHW data layout) - * -# @ref opencl::kernels::ClActivationKernel - */ -class ClGemmConv2d : public IClOperator -{ -public: - /** Constructor */ - ClGemmConv2d(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ClGemmConv2d(const ClGemmConv2d &) = delete; - /** Default move constructor */ - ClGemmConv2d(ClGemmConv2d &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ClGemmConv2d &operator=(const ClGemmConv2d &) = delete; - /** Default move assignment operator */ - ClGemmConv2d &operator=(ClGemmConv2d &&) = default; - /**Default destructor */ - ~ClGemmConv2d(); - /** Set the input and output tensors. - * - * Valid data layouts: - * - NHWC - * - NCHW - * - * Valid data type configurations: - * |src0 |src1 |src2 |dst | - * |:--------------|:------------------|:--------|:--------------| - * |F16 |F16 |F16 |F16 | - * |F32 |F32 |F32 |F32 | - * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | - * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | - * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | - * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8 or QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8_SIGNED. - * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of quantized type where biases should be of S32 type. - * @param[out] dst Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. - * Data types supported: Same as @p input. - * @param[in] conv2d_info Contains convolution 2d info described in @ref Conv2dInfo. - * @param[in] weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. If this is not part of the fully connected layer the weights - * tensor has also been transposed with CLGEMMReshapeRHSMatrixKernel. Data type supported: Same as @p input. - */ - void configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const Conv2dInfo &conv2d_info, - const WeightsInfo &weights_info = WeightsInfo()); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to ClGemmConvolution::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const Conv2dInfo &conv2d_info, - const WeightsInfo &weights_info = WeightsInfo()); - - // Inherited methods overridden: - void run(ITensorPack &tensors) override; - void prepare(ITensorPack &constants) override; - experimental::MemoryRequirements workspace() const override; - -private: - /** Configures the appropriate matrix multiply routine - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor info. Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8 or - * QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8_SIGNED. - * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of quantized type where biases should be of S32 type. - * @param[in, out] dst Output tensor info. Data types supported: same as @p input. - * @param[in] gemmlowp_output_stage GEMMLowp output stage info - * @param[in] gemm_3d_depth Depth of GEMM 3D - * @param[in] act_info Activation to apply after the matrix multiplication - */ - void configure_mm(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, - const GEMMLowpOutputStageInfo &gemmlowp_output_stage, - int gemm_3d_depth, const ActivationLayerInfo &act_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer matrix multiply routines - * - * @param[in] src Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor info. Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8 or - * QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8_SIGNED. - * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of quantized type where biases should be of S32 type. - * @param[in] dst Output tensor info. Data types supported: same as @p input. - * @param[in] gemmlowp_output_stage GEMMLowp output stage info - * @param[in] gemm_3d_depth Depth of GEMM 3D - * @param[in] skip_im2col Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout. - * @param[in] act_info Activation to apply after the matrix multiplication - * - * @return a status - */ - static Status validate_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const GEMMLowpOutputStageInfo &gemmlowp_output_stage, - int gemm_3d_depth, bool skip_im2col, const ActivationLayerInfo &act_info); - - enum AuxTensorIdx - { - // ClGemmLowpMatrixMultiplyCore has up to 7 internal tensors - Im2ColOutput = 8, - WeightsReshaped, - GemmOutput, - Count - }; - - std::unique_ptr<kernels::ClWeightsReshapeKernel> _weights_reshape_kernel; - std::unique_ptr<kernels::ClIm2ColKernel> _im2col_kernel; - std::unique_ptr<ClGemm> _mm_gemm; - std::unique_ptr<ClGemmLowpMatrixMultiplyCore> _mm_gemmlowp; - std::unique_ptr<opencl::kernels::ClCol2ImKernel> _col2im_kernel; - std::unique_ptr<kernels::ClActivationKernel> _activation_kernel; - - TensorInfo _im2col_output; - TensorInfo _weights_reshaped; - TensorInfo _gemm_output; - - bool _skip_im2col; - bool _skip_col2im; - bool _is_quantized; - bool _fuse_activation; - bool _append_bias; - bool _is_prepared; - - experimental::MemoryRequirements _aux_mem; -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_GEMM_CONV2D_H */ diff --git a/src/runtime/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.cpp b/src/runtime/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.cpp deleted file mode 100644 index 0c72912642..0000000000 --- a/src/runtime/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.cpp +++ /dev/null @@ -1,786 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/KernelDescriptors.h" -#include "arm_compute/core/Log.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/utils/quantization/AsymmHelpers.h" -#include "arm_compute/runtime/CL/CLScheduler.h" - -#include "src/core/gpu/cl/kernels/ClCastKernel.h" -#include "src/core/gpu/cl/kernels/ClGemmLowpMatrixMultiplyNativeKernel.h" -#include "src/core/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel.h" -#include "src/core/gpu/cl/kernels/ClGemmLowpOffsetContributionKernel.h" -#include "src/core/gpu/cl/kernels/ClGemmLowpOffsetContributionOutputStageKernel.h" -#include "src/core/gpu/cl/kernels/ClGemmLowpReductionKernel.h" -#include "src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/MemoryHelpers.h" -#include "src/runtime/CL/gemm_auto_heuristics/CLGEMMAutoHeuristics.h" -#include "src/runtime/gpu/cl/utils/ClAuxTensorHandler.h" - -#include "utils/TypePrinter.h" - -namespace arm_compute -{ -namespace opencl -{ -using namespace arm_compute::misc::shape_calculator; -using namespace arm_compute::cl_gemm; -using namespace arm_compute::opencl::kernels; -using namespace arm_compute::experimental; - -namespace -{ -inline bool validate_gemm_kernel(CLGEMMKernelType kernel_type) -{ - switch(kernel_type) - { - case CLGEMMKernelType::NATIVE: - case CLGEMMKernelType::RESHAPED_ONLY_RHS: - { - return true; - } - default: - { - return false; - } - } -} - -//Automatically select between mlgo (prioritized) and default heuristics for gemm kernel type -inline CLGEMMKernelType auto_select_gemm_kernel(auto_heuristics::CommonQuery query, bool reshape_b_only_on_first_run) -{ - auto gemm_kernel = auto_heuristics::select_mlgo_gemm_kernel(query, reshape_b_only_on_first_run); - if(bool(gemm_kernel)) - { - if(validate_gemm_kernel(gemm_kernel.gemm_type)) - { - ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use gemm kernel from mlgo heuristics: %s.", to_string(gemm_kernel.gemm_type).c_str()); - return gemm_kernel.gemm_type; - } - } - gemm_kernel = auto_heuristics::select_default_gemm_kernel(query, reshape_b_only_on_first_run); - ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use gemm kernel from default heuristics: %s.", to_string(gemm_kernel.gemm_type).c_str()); - return gemm_kernel.gemm_type; -} - -// Validate lhs_info and rhs_info for native kernel -inline bool validate_lhs_rhs_info_native(const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const ITensorInfo *a, const ITensorInfo *b, const GEMMReshapeInfo &reshape_info) -{ - // Validate GEMMLHSMatrixInfo and GEMMRHSMatrixInfo for reshaped only rhs kernel - TensorInfo mm_result_s32_info{}; - // Output tensor auto initialization if not yet initialized - auto_init_if_empty(mm_result_s32_info, a->clone()->set_tensor_shape(compute_mm_shape(*a, *b, false, reshape_info)).set_data_type(DataType::S32)); - // Validate mm kernel - // NOTE: Ignore all other parameters (eg. output stage etc.) and only validate lhs and rhs info - // NOTE: This assumes: - // 1. lhs and rhs info's validity does not depend on these other parameters and vice versa(in CLGEMMLowpMatrixMultiplyNativeKernel.cpp validate_arguments). - // 2. lhs and rhs info does not cause window and padding issues through side effects (in CLGEMMLowpMatrixMultiplyNativeKernel.cpp validate_and_configure_window). - if(!bool(ClGemmLowpMatrixMultiplyNativeKernel::validate(a, b, &mm_result_s32_info, lhs_info, rhs_info, reshape_info))) - { - return false; - } - return true; -} - -// Automatically select between mlgo (prioritized) and default heuristics for native kernel configs -std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> auto_select_gemm_config_native(auto_heuristics::CommonQuery query, const ITensorInfo *a, const ITensorInfo *b, const GEMMReshapeInfo &reshape_info) -{ - auto config = auto_heuristics::select_mlgo_gemm_config_native(query); - if(config) - { - if(validate_lhs_rhs_info_native(config.lhs_info, config.rhs_info, a, b, reshape_info)) - { - ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use native config from mlgo heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str()); - return { config.lhs_info, config.rhs_info }; - } - } - config = auto_heuristics::select_default_gemm_config_native(query); - ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use native config from default heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str()); - return { config.lhs_info, config.rhs_info }; -} - -// Validate lhs_info and rhs_info for reshaped only rhs kernel -inline bool validate_lhs_rhs_info_reshaped_only_rhs(const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *output, - unsigned int m, unsigned int n, unsigned int k, bool reinterpret_input_as_3d, int depth_output_gemm3d) -{ - // Validate GEMMLHSMatrixInfo and GEMMRHSMatrixInfo for reshaped only rhs kernel - TensorInfo tmp_b_info{}; - // Validate reshape RHS kernel - auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info))); - if(!bool(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info))) - { - return false; - } - // Validate mm kernel - // NOTE: Ignore all other parameters (eg. depth_output_gemm3d, output stage etc.) and only validate lhs and rhs info - // NOTE: This assumes: - // 1. lhs and rhs info's validity does not depend on these other parameters and vice versa(in ClGemmLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp validate_arguments). - // 2. lhs and rhs info does not cause window and padding issues through side effects (in ClGemmLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp validate_and_configure_window). - GEMMKernelInfo gemm_kernel_info; - gemm_kernel_info.m = m; - gemm_kernel_info.n = n; - gemm_kernel_info.k = k; - gemm_kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d; - gemm_kernel_info.depth_output_gemm3d = depth_output_gemm3d; - gemm_kernel_info.lhs_info = lhs_info; - gemm_kernel_info.rhs_info = rhs_info; - // Since we ignore the output stage, output data type has to be S32 to pass the validation - TensorInfo output_info_copy(*output); - output_info_copy.set_data_type(DataType::S32); - if(!bool(ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, &output_info_copy, gemm_kernel_info))) - { - return false; - } - return true; -} - -// Automatically select between mlgo (prioritized) and default heuristics for reshaped only rhs kernel configs -std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery query, bool reinterpret_input_as_3d, int depth_output_gemm3d, - const ITensorInfo *a, - const ITensorInfo *b, const ITensorInfo *output) -{ - auto config = auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(query); - if(config) - { - if(validate_lhs_rhs_info_reshaped_only_rhs(config.lhs_info, config.rhs_info, a, b, output, query.m, query.n, query.k, reinterpret_input_as_3d, depth_output_gemm3d)) - { - ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped_only_rhs config from mlgo heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str()); - return { config.lhs_info, config.rhs_info }; - } - } - config = auto_heuristics::select_default_gemm_config_reshaped_only_rhs(query); - ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped_only_rhs config from default heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str()); - return { config.lhs_info, config.rhs_info }; -} - -inline bool is_gemm_reshaped(CLGEMMKernelType kernel_type) -{ - switch(kernel_type) - { - case CLGEMMKernelType::NATIVE: - return false; - case CLGEMMKernelType::RESHAPED_ONLY_RHS: - return true; - default: - ARM_COMPUTE_ERROR("Not supported gemmlowp kernel!"); - } -} -} // namespace - -ClGemmLowpMatrixMultiplyCore::ClGemmLowpMatrixMultiplyCore() - : _weights_to_qasymm8(std::make_unique<ClCastKernel>()), - _mm_native_kernel(std::make_unique<ClGemmLowpMatrixMultiplyNativeKernel>()), - _mm_reshaped_only_rhs_kernel(std::make_unique<ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel>()), - _mtx_b_reshape_kernel(std::make_unique<ClGemmReshapeRhsMatrixKernel>()), - _mtx_a_reduction_kernel(std::make_unique<ClGemmLowpMatrixAReductionKernel>()), - _mtx_b_reduction_kernel(std::make_unique<ClGemmLowpMatrixBReductionKernel>()), - _offset_contribution_kernel(std::make_unique<ClGemmLowpOffsetContributionKernel>()), - _offset_contribution_output_stage_kernel(std::make_unique<ClGemmLowpOffsetContributionOutputStageKernel>()), - _aux_mem(AuxTensorIdx::Count) -{ -} - -ClGemmLowpMatrixMultiplyCore::~ClGemmLowpMatrixMultiplyCore() = default; - -void ClGemmLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_context, - ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, - const GEMMInfo &gemm_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output); - ARM_COMPUTE_ERROR_THROW_ON(ClGemmLowpMatrixMultiplyCore::validate(a, b, c != nullptr ? c : nullptr, output, gemm_info)); - - _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run(); - _a_offset = a->quantization_info().uniform().offset; - _convert_to_qasymm8 = is_data_type_quantized_per_channel(b->data_type()) && is_data_type_quantized_symmetric(b->data_type()) - && a->data_type() == DataType::QASYMM8; - _b_offset = _convert_to_qasymm8 ? -128 : b->quantization_info().uniform().offset; - _gemm_info = gemm_info; - - // Get the GPU target - const GPUTarget gpu_target = CLScheduler::get().target(); - - // Set the target for the kernels - _mm_native_kernel->set_target(gpu_target); - _mm_reshaped_only_rhs_kernel->set_target(gpu_target); - - GEMMRHSMatrixInfo rhs_info; - GEMMLHSMatrixInfo lhs_info; - - // Arguments used by GEMMReshapeInfo - // If we pass the matrix A and matrix B reshaped to CLGEMMMatrixMultiplyKernel, we need to pass m, n, k, mult_transpose1xW_width and mult_interleave4x4_height to CLGEMMReshapeInfo - // in order to know how the matrices have been reshaped - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2); - const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); - - const auto reshape_info = GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d); - - // Check if we need to reshape the matrix A and matrix B - _is_gemm_reshaped = is_gemm_reshaped(auto_select_gemm_kernel(auto_heuristics::CommonQuery{ gpu_target, a->data_type(), m, n, k, batch_size }, _reshape_b_only_on_first_run)); - - if(_convert_to_qasymm8) - { - // Set data type for converted weights - _qasymm8_weights = *b; - _qasymm8_weights.set_data_type(DataType::QASYMM8); - _weights_to_qasymm8->configure(compile_context, b, &_qasymm8_weights, ConvertPolicy::WRAP); - } - - ITensorInfo *matrix_b = _convert_to_qasymm8 ? &_qasymm8_weights : b; - if(_is_gemm_reshaped) - { - matrix_b = &_tmp_b; - - // Pick up the GEMM configuration - // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration - std::tie(lhs_info, rhs_info) = auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size }, reinterpret_input_as_3d, - depth_output_gemm3d, - a, _convert_to_qasymm8 ? &_qasymm8_weights : b, output); - - // Configure reshape RHS kernel - _mtx_b_reshape_kernel->configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_tmp_b, rhs_info); - } - - // Using default reduction info - const GEMMLowpReductionKernelInfo reduction_info {}; - - // Initialize matrix B reduction kernel only if _a_offset is not equal to 0 - if(_a_offset != 0) - { - _vector_sum_col = TensorInfo(compute_reductionA_shape(*b), 1, DataType::S32); - - // Configure Matrix B reduction kernel - _mtx_b_reduction_kernel->configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_vector_sum_col, reduction_info); - } - - // Initialize Matrix A reduction kernel only if _b_offset is not equal to 0 - if(_b_offset != 0) - { - _vector_sum_row = TensorInfo(compute_reductionB_shape(*a), 1, DataType::S32); - - // Configure matrix A reduction kernel - _mtx_a_reduction_kernel->configure(compile_context, a, &_vector_sum_row, reduction_info); - } - - GEMMKernelInfo gemm_kernel_info; - gemm_kernel_info.m = m; - gemm_kernel_info.n = n; - gemm_kernel_info.k = k; - gemm_kernel_info.depth_output_gemm3d = depth_output_gemm3d; - gemm_kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d; - gemm_kernel_info.lhs_info = lhs_info; - gemm_kernel_info.rhs_info = rhs_info; - gemm_kernel_info.a_offset = _a_offset; - gemm_kernel_info.b_offset = _b_offset; - // If GEMMLowpOutputStage != NONE, fuse the offset contribution with the output stage - if(gemm_info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE) - { - // Configure offset contribution kernel - const size_t num_filters = (gemm_info.gemmlowp_output_stage().is_quantized_per_channel) ? gemm_info.gemmlowp_output_stage().gemmlowp_multipliers.size() : 1; - - _gemm_output_stage_multipliers = TensorInfo(TensorShape(num_filters), 1, DataType::S32); - _gemm_output_stage_shifts = TensorInfo(TensorShape(num_filters), 1, DataType::S32); - - GEMMLowpOutputStageInfo gemmlowp_output_stage = gemm_info.gemmlowp_output_stage(); - gemmlowp_output_stage.output_data_type = a->data_type(); - if(num_filters == 1) - { - // Per-channel quantization with OFM == 1 is equivalent to uniform quantization. - // Setting this flag to false prevents the kernel from adding useless padding to the output multipliers and shifts - gemmlowp_output_stage.is_quantized_per_channel = false; - } - - gemm_kernel_info.output_stage = gemmlowp_output_stage; - - if(_is_gemm_reshaped && gemmlowp_output_stage.type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT) - { - // Configure and tune matrix multiply kernel with fused output stage - _mm_reshaped_only_rhs_kernel->configure(compile_context, a, matrix_b, output, gemm_kernel_info, _a_offset == 0 ? nullptr : &_vector_sum_col, - _b_offset == 0 ? nullptr : &_vector_sum_row, c != nullptr ? c : nullptr, &_gemm_output_stage_multipliers, &_gemm_output_stage_shifts); - } - else - { - _run_output_stage = true; - - if(_is_gemm_reshaped) - { - _mm_reshaped_only_rhs_kernel->configure(compile_context, a, matrix_b, &_mm_result_s32, gemm_kernel_info); - } - else - { - // Pick up the GEMM configuration - // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration - std::tie(lhs_info, rhs_info) = auto_select_gemm_config_native(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size }, - a, _convert_to_qasymm8 ? &_qasymm8_weights : matrix_b, reshape_info); - - // Configure matrix multiply kernel - _mm_native_kernel->configure(compile_context, a, matrix_b, &_mm_result_s32, lhs_info, rhs_info, reshape_info); - - _offset_contribution_output_stage_kernel->configure(compile_context, &_mm_result_s32, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, - c != nullptr ? c : nullptr, output, a->dimension(0), _a_offset, _b_offset, gemmlowp_output_stage, - &_gemm_output_stage_multipliers, &_gemm_output_stage_shifts); - } - } - } - else - { - _run_offset_contribution = true; - if(_is_gemm_reshaped) - { - // Configure and tune matrix multiply kernel - _mm_reshaped_only_rhs_kernel->configure(compile_context, a, matrix_b, output, gemm_kernel_info); - } - else - { - // Pick up the GEMM configuration - // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration - std::tie(lhs_info, rhs_info) = auto_select_gemm_config_native(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size }, - a, _convert_to_qasymm8 ? &_qasymm8_weights : b, reshape_info); - - // Configure matrix multiply kernel - _mm_native_kernel->configure(compile_context, a, matrix_b, output, lhs_info, rhs_info, reshape_info); - } - - // Configure offset contribution kernel - _offset_contribution_kernel->configure(compile_context, output, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, - c != nullptr ? c : nullptr, a->dimension(0), _a_offset, _b_offset); - } - - // Request memory - _aux_mem[RhsQAsymm8] = MemoryInfo(offset_int_vec(RhsQAsymm8), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _qasymm8_weights.total_size()); - if(_is_gemm_reshaped) - { - // Overwrite Rhs as prepare if gemm is reshaped as there will be a two-step transformation - _aux_mem[RhsQAsymm8] = MemoryInfo(offset_int_vec(RhsQAsymm8), _reshape_b_only_on_first_run ? MemoryLifetime::Prepare : MemoryLifetime::Temporary, _qasymm8_weights.total_size()); - _aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size()); - } - if(_a_offset != 0) - { - _aux_mem[VecSumCol] = MemoryInfo(offset_int_vec(VecSumCol), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _vector_sum_col.total_size()); - } - if(_b_offset != 0) - { - _aux_mem[VecSumRow] = MemoryInfo(offset_int_vec(VecSumRow), MemoryLifetime::Temporary, _vector_sum_row.total_size()); - } - _aux_mem[ResultS32] = MemoryInfo(offset_int_vec(ResultS32), MemoryLifetime::Temporary, _mm_result_s32.total_size()); - _aux_mem[Multipliers] = MemoryInfo(offset_int_vec(Multipliers), MemoryLifetime::Persistent, _gemm_output_stage_multipliers.total_size()); - _aux_mem[Shifts] = MemoryInfo(offset_int_vec(Shifts), MemoryLifetime::Persistent, _gemm_output_stage_shifts.total_size()); -} - -Status ClGemmLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(b, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8, DataType::QSYMM8_PER_CHANNEL); - ARM_COMPUTE_RETURN_ERROR_ON(a->data_type() == DataType::QASYMM8 && b->data_type() == DataType::QASYMM8_SIGNED); - ARM_COMPUTE_RETURN_ERROR_ON(a->data_type() == DataType::QASYMM8_SIGNED && b->data_type() == DataType::QASYMM8); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(), "Matrix A already reshaped is not supported"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(), "Matrix B already reshaped is not supported"); - - int32_t a_offset = a->quantization_info().uniform().offset; - int32_t b_offset = b->quantization_info().uniform().offset; - - const ITensorInfo *matrix_a_info = a; - - TensorInfo tmp_b_info{}; - GEMMRHSMatrixInfo rhs_info; - GEMMLHSMatrixInfo lhs_info; - - // Get the GPU target - const GPUTarget gpu_target = CLScheduler::get().target(); - - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2); - const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); - - bool reshape_matrix_b = is_gemm_reshaped(auto_select_gemm_kernel(auto_heuristics::CommonQuery{ gpu_target, a->data_type(), m, n, k, batch_size }, gemm_info.reshape_b_only_on_first_run())); - - const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d); - - bool convert_to_qasymm8 = is_data_type_quantized_per_channel(b->data_type()) && is_data_type_quantized_symmetric(b->data_type()) - && is_data_type_quantized_asymmetric(a->data_type()); - TensorInfo weights_info(*b); - if(convert_to_qasymm8) - { - b_offset = -128; - weights_info.set_data_type(DataType::QASYMM8); - ARM_COMPUTE_RETURN_ON_ERROR(ClCastKernel::validate(b, &weights_info, ConvertPolicy::WRAP)); - } - const ITensorInfo *matrix_b_info = &weights_info; - if(reshape_matrix_b) - { - matrix_b_info = &tmp_b_info; - - // Pick up the GEMM configuration - // NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails - // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration - const auto res = select_default_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size }); - lhs_info = res.lhs_info; - rhs_info = res.rhs_info; - - // Validate reshape RHS kernel - auto_init_if_empty(tmp_b_info, weights_info.clone()->set_tensor_shape(compute_rhs_reshaped_shape(weights_info, rhs_info))); - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeRhsMatrixKernel::validate(&weights_info, &tmp_b_info, rhs_info)); - } - - TensorInfo info_vector_sum_col{}; - TensorInfo info_vector_sum_row{}; - - const GEMMLowpReductionKernelInfo reduction_info; - // Validate matrix B reduction kernel only if _a_offset is not equal to 0 - if(a_offset != 0) - { - info_vector_sum_col = TensorInfo(compute_reductionA_shape(weights_info), 1, DataType::S32); - - // Configure Matrix B reduction kernel - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmLowpMatrixBReductionKernel::validate(&weights_info, &info_vector_sum_col, reduction_info)); - } - - // Validate Matrix A reduction kernel only if _b_offset is not equal to 0 - if(b_offset != 0) - { - info_vector_sum_row = TensorInfo(compute_reductionB_shape(*a), 1, DataType::S32); - - // Configure matrix A reduction kernel - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmLowpMatrixAReductionKernel::validate(a, &info_vector_sum_row, reduction_info)); - } - - GEMMKernelInfo gemm_kernel_info; - gemm_kernel_info.m = m; - gemm_kernel_info.n = n; - gemm_kernel_info.k = k; - gemm_kernel_info.depth_output_gemm3d = depth_output_gemm3d; - gemm_kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d; - gemm_kernel_info.lhs_info = lhs_info; - gemm_kernel_info.rhs_info = rhs_info; - gemm_kernel_info.a_offset = a_offset; - gemm_kernel_info.b_offset = b_offset; - if(gemm_info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE) - { - const size_t num_filters = (gemm_info.gemmlowp_output_stage().is_quantized_per_channel) ? gemm_info.gemmlowp_output_stage().gemmlowp_multipliers.size() : 1; - - const TensorInfo gemm_output_stage_multipliers_shifts_info(TensorInfo(TensorShape(num_filters), 1, DataType::S32)); - - GEMMLowpOutputStageInfo gemmlowp_output_stage = gemm_info.gemmlowp_output_stage(); - gemmlowp_output_stage.output_data_type = a->data_type(); - - gemm_kernel_info.output_stage = gemmlowp_output_stage; - if(reshape_matrix_b && gemm_info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT) - { - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel::validate(matrix_a_info, matrix_b_info, output, gemm_kernel_info, - a_offset == 0 ? nullptr : &info_vector_sum_col, - b_offset == 0 ? nullptr : &info_vector_sum_row, - c, - &gemm_output_stage_multipliers_shifts_info, - &gemm_output_stage_multipliers_shifts_info)); - } - else - { - TensorInfo mm_result_s32_info{}; - - if(reshape_matrix_b) - { - // Output tensor auto inizialitation if not yet initialized - auto_init_if_empty(mm_result_s32_info, a->clone()->set_tensor_shape(compute_mm_shape(*matrix_a_info, *matrix_b_info, reshape_info)).set_data_type(DataType::S32)); - - // Validate matrix multiply - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel::validate(matrix_a_info, matrix_b_info, &mm_result_s32_info, gemm_kernel_info)); - } - else - { - // Output tensor auto inizialitation if not yet initialized - auto_init_if_empty(mm_result_s32_info, a->clone()->set_tensor_shape(compute_mm_shape(*matrix_a_info, *matrix_b_info, false, reshape_info)).set_data_type(DataType::S32)); - - // Pick up the GEMM configuration - // NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails - // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration - const auto res = select_default_gemm_config_native(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size }); - lhs_info = res.lhs_info; - rhs_info = res.rhs_info; - - // Validate matrix multiply - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmLowpMatrixMultiplyNativeKernel::validate(matrix_a_info, matrix_b_info, &mm_result_s32_info, lhs_info, rhs_info, reshape_info)); - } - - // Validate offset contribution kernel - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmLowpOffsetContributionOutputStageKernel::validate(&mm_result_s32_info, - a_offset == 0 ? nullptr : &info_vector_sum_col, - b_offset == 0 ? nullptr : &info_vector_sum_row, - c, - output, - a_offset, b_offset, - gemmlowp_output_stage, - &gemm_output_stage_multipliers_shifts_info, - &gemm_output_stage_multipliers_shifts_info)); - } - } - else - { - if(reshape_matrix_b) - { - // Validate matrix multiply - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel::validate(matrix_a_info, matrix_b_info, output, gemm_kernel_info)); - } - else - { - // Pick up the GEMM configuration - // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration - const auto res = select_default_gemm_config_native(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size }); - lhs_info = res.lhs_info; - rhs_info = res.rhs_info; - - // Validate matrix multiply - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmLowpMatrixMultiplyNativeKernel::validate(matrix_a_info, matrix_b_info, output, lhs_info, rhs_info, reshape_info)); - } - - if(output->total_size() != 0) - { - // Validate offset contribution kernel - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmLowpOffsetContributionKernel::validate(output, - a_offset == 0 ? nullptr : &info_vector_sum_col, - b_offset == 0 ? nullptr : &info_vector_sum_row, - c, - a_offset, b_offset)); - } - } - - return Status{}; -} - -void ClGemmLowpMatrixMultiplyCore::run(ITensorPack &tensors) -{ - const ITensor *a = tensors.get_const_tensor(ACL_SRC_0); - const ITensor *b = tensors.get_const_tensor(ACL_SRC_1); - const ITensor *c = tensors.get_const_tensor(ACL_SRC_2); - ITensor *dst = tensors.get_tensor(ACL_DST); - - ARM_COMPUTE_ERROR_ON_NULLPTR(a, dst); - - CLAuxTensorHandler vec_sum_col(offset_int_vec(VecSumCol), _vector_sum_col, tensors, true); - CLAuxTensorHandler vec_sum_row(offset_int_vec(VecSumRow), _vector_sum_row, tensors, true); - CLAuxTensorHandler rhs_qasymm8(offset_int_vec(RhsQAsymm8), _qasymm8_weights, tensors, true); - CLAuxTensorHandler tmp_b(offset_int_vec(RhsReshape), _tmp_b, tensors, true); - CLAuxTensorHandler res32(offset_int_vec(ResultS32), _mm_result_s32, tensors, true); - CLAuxTensorHandler shifts(offset_int_vec(Shifts), _gemm_output_stage_shifts, tensors, true); - CLAuxTensorHandler multipliers(offset_int_vec(Multipliers), _gemm_output_stage_multipliers, tensors, true); - - // Prepare the consts if needed - prepare(tensors); - - const ITensor *matrix_a = a; - const ITensor *matrix_b = _convert_to_qasymm8 ? rhs_qasymm8.get() : b; - - if(_is_gemm_reshaped) - { - matrix_b = tmp_b.get(); - if(!_reshape_b_only_on_first_run) - { - // Run reshape matrix B - ITensorPack mtx_b_reshape_pack = - { - { TensorType::ACL_SRC, _convert_to_qasymm8 ? rhs_qasymm8.get() : b }, - { TensorType::ACL_DST, tmp_b.get() } - }; - CLScheduler::get().enqueue_op(*_mtx_b_reshape_kernel, mtx_b_reshape_pack, false); - } - } - - // Run matrix B reduction kernel only if _a_offset is not equal to 0 - if(_a_offset != 0 && !_reshape_b_only_on_first_run) - { - ITensorPack mtx_b_red_pack = - { - { TensorType::ACL_SRC, _convert_to_qasymm8 ? rhs_qasymm8.get() : b }, - { TensorType::ACL_DST, vec_sum_col.get() } - }; - CLScheduler::get().enqueue_op(*_mtx_b_reduction_kernel, mtx_b_red_pack, false); - } - - // Run matrix A reduction kernel only if _b_offset is not equal to 0 - if(_b_offset != 0) - { - ITensorPack mtx_a_red_pack = - { - { TensorType::ACL_SRC, matrix_a }, - { TensorType::ACL_DST, vec_sum_row.get() } - }; - CLScheduler::get().enqueue_op(*_mtx_a_reduction_kernel, mtx_a_red_pack, false); - } - - // Run matrix multiply - if(_is_gemm_reshaped) - { - ITensorPack gemm_reshaped_pack; - if(_run_offset_contribution) - { - gemm_reshaped_pack = ITensorPack({ { TensorType::ACL_SRC_0, matrix_a }, - { TensorType::ACL_SRC_1, matrix_b }, - { TensorType::ACL_DST, _run_output_stage ? res32.get() : dst } - }); - } - else - { - gemm_reshaped_pack = ITensorPack( - { - { TensorType::ACL_SRC, matrix_a }, - { TensorType::ACL_SRC_1, matrix_b }, - { TensorType::ACL_BIAS, c }, - { TensorType::ACL_VEC_ROW_SUM, _b_offset == 0 ? nullptr : vec_sum_row.get() }, - { TensorType::ACL_VEC_COL_SUM, _a_offset == 0 ? nullptr : vec_sum_col.get() }, - { TensorType::ACL_SHIFTS, shifts.get() }, - { TensorType::ACL_MULTIPLIERS, multipliers.get() }, - { TensorType::ACL_DST, dst }, - }); - } - CLScheduler::get().enqueue_op(*_mm_reshaped_only_rhs_kernel, gemm_reshaped_pack, false); - } - else - { - ITensorPack gemm_native_pack = - { - { TensorType::ACL_SRC_0, matrix_a }, - { TensorType::ACL_SRC_1, matrix_b }, - { TensorType::ACL_DST, _run_offset_contribution ? dst : res32.get() } - }; - CLScheduler::get().enqueue_op(*_mm_native_kernel, gemm_native_pack, false); - } - if(_run_output_stage) - { - // Run offset contribution/output stage kernel - ITensorPack output_stage_pack = - { - { TensorType::ACL_SRC, res32.get() }, - { TensorType::ACL_BIAS, c }, - { TensorType::ACL_VEC_ROW_SUM, _b_offset == 0 ? nullptr : vec_sum_row.get() }, - { TensorType::ACL_VEC_COL_SUM, _a_offset == 0 ? nullptr : vec_sum_col.get() }, - { TensorType::ACL_SHIFTS, shifts.get() }, - { TensorType::ACL_MULTIPLIERS, multipliers.get() }, - { TensorType::ACL_DST, dst }, - }; - CLScheduler::get().enqueue_op(*_offset_contribution_output_stage_kernel, output_stage_pack, true); - } - if(_run_offset_contribution) - { - // Run offset contribution kernel - ITensorPack offset_contrib_pack = - { - { TensorType::ACL_SRC_DST, dst }, - { TensorType::ACL_BIAS, c }, - { TensorType::ACL_VEC_ROW_SUM, _b_offset == 0 ? nullptr : vec_sum_row.get() }, - { TensorType::ACL_VEC_COL_SUM, _a_offset == 0 ? nullptr : vec_sum_col.get() } - }; - CLScheduler::get().enqueue_op(*_offset_contribution_kernel, offset_contrib_pack, true); - } -} - -void ClGemmLowpMatrixMultiplyCore::prepare(ITensorPack &tensors) -{ - if(!_is_prepared) - { - auto b = tensors.get_const_tensor(TensorType::ACL_SRC_1); - CLAuxTensorHandler tmp_b(offset_int_vec(RhsReshape), _tmp_b, tensors, true); - CLAuxTensorHandler vec_sum_col(offset_int_vec(VecSumCol), _vector_sum_col, tensors, true); - CLAuxTensorHandler rhs_qasymm8(offset_int_vec(RhsQAsymm8), _qasymm8_weights, tensors, false); - - ARM_COMPUTE_ERROR_ON_NULLPTR(b); - - if(_convert_to_qasymm8) - { - ITensorPack convert_to_qs8_pack = { { ACL_SRC, b }, { ACL_DST, rhs_qasymm8.get() } }; - CLScheduler::get().enqueue_op(*_weights_to_qasymm8, convert_to_qs8_pack, false); - b->mark_as_unused(); - } - - if(_is_gemm_reshaped && _reshape_b_only_on_first_run) - { - // Run reshape kernel and mark original weights tensor as unused - ITensorPack mtx_b_pack = - { - { TensorType::ACL_SRC, _convert_to_qasymm8 ? rhs_qasymm8.get() : b }, - { TensorType::ACL_DST, tmp_b.get() } - }; - CLScheduler::get().enqueue_op(*_mtx_b_reshape_kernel, mtx_b_pack, false); - b->mark_as_unused(); - } - - // Run matrix B reduction kernel only if _a_offset is not equal to 0 - if(_a_offset != 0 && _reshape_b_only_on_first_run) - { - ITensorPack mtx_b_red_pack = - { - { TensorType::ACL_SRC, _convert_to_qasymm8 ? rhs_qasymm8.get() : b }, - { TensorType::ACL_DST, vec_sum_col.get() } - }; - CLScheduler::get().enqueue_op(*_mtx_b_reduction_kernel, mtx_b_red_pack, false); - } - - // Compute GEMM output multipliers and shifts for output stage - { - const size_t num_filters = (_gemm_info.gemmlowp_output_stage().is_quantized_per_channel) ? _gemm_info.gemmlowp_output_stage().gemmlowp_multipliers.size() : 1; - - CLAuxTensorHandler multipliers(offset_int_vec(Multipliers), _gemm_output_stage_multipliers, tensors, false); - CLAuxTensorHandler shifts(offset_int_vec(Shifts), _gemm_output_stage_shifts, tensors, false); - - ICLTensor *multiplier_tensor = multipliers.get(); - if(multiplier_tensor != nullptr && multiplier_tensor->info()->total_size() > 0) - { - multiplier_tensor->map(CLScheduler::get().queue(), true); - std::memcpy(multiplier_tensor->ptr_to_element(Coordinates(0)), _gemm_info.gemmlowp_output_stage().gemmlowp_multipliers.data(), num_filters * sizeof(int32_t)); - multiplier_tensor->unmap(CLScheduler::get().queue()); - } - - ICLTensor *shifts_tensor = shifts.get(); - if(shifts.get() != nullptr && shifts_tensor->info()->total_size() > 0) - { - shifts_tensor->map(CLScheduler::get().queue(), true); - std::memcpy(shifts_tensor->ptr_to_element(Coordinates(0)), _gemm_info.gemmlowp_output_stage().gemmlowp_shifts.data(), num_filters * sizeof(int32_t)); - shifts_tensor->unmap(CLScheduler::get().queue()); - } - } - CLScheduler::get().queue().finish(); - _is_prepared = true; - } -} - -experimental::MemoryRequirements ClGemmLowpMatrixMultiplyCore::workspace() const -{ - return _aux_mem; -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.h b/src/runtime/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.h deleted file mode 100644 index 36a4257b86..0000000000 --- a/src/runtime/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_GEMMLOWP_MATRIXMULTIPLY_CORE_H -#define ARM_COMPUTE_CL_GEMMLOWP_MATRIXMULTIPLY_CORE_H - -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/runtime/CL/CLTypes.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -namespace kernels -{ -// Forward declarations -class ClCastKernel; -class ClGemmLowpMatrixMultiplyNativeKernel; -class ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel; -class ClGemmReshapeRhsMatrixKernel; -class ClGemmLowpMatrixAReductionKernel; -class ClGemmLowpMatrixBReductionKernel; -class ClGemmLowpOffsetContributionKernel; -class ClGemmLowpOffsetContributionOutputStageKernel; -} // namespace kernels - -/** Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL. */ -class ClGemmLowpMatrixMultiplyCore : public IClOperator -{ -public: - ClGemmLowpMatrixMultiplyCore(); - ~ClGemmLowpMatrixMultiplyCore(); - /** Initialise the kernel's inputs, output - * - * Valid data layouts: - * - NHWC - * - NCHW - * - * Valid data type configurations: - * |src0 |src1 |src2 |dst | - * |:--------------|:------------------|:--------|:--------------| - * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | - * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | - * |QASYMM8 |QSYMM8 |S32 |QASYMM8 | - * |QASYMM8 |QASYMM8 |S32 |S32 | - * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |S32 | - * |QASYMM8 |QSYMM8 |S32 |S32 | - * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | - * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | - * |QASYMM8_SIGNED |QSYMM8 |S32 |QASYMM8_SIGNED | - * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |S32 | - * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |S32 | - * |QASYMM8_SIGNED |QSYMM8 |S32 |S32 | - * - * @note GEMMLowp: low precision GEMM kernel. [A * B + C] - * This kernel performs the following computations: - * - * -# Convert a values from 8-bit quantized to int32 and add a_offset to each of them. - * -# Convert b values from 8-bit quantized to int32 and add b_offset to each of them. - * -# Compute the matrix product of the resulting a * b in int32. - * -# Quantize to uint8 if gemm_info.gemmlowp_output_stage != NONE - * - * @param[in] compile_context The compile context to be used. - * @param[in] a First input tensor (Matrix A). Data type supported: QASYMM8/QASYMM8_SIGNED. - * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a - * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported: S32 - * @param[out] output Output tensor. Data type supported: S32 or QASYMM8/QASYMM8_SIGNED if gemm_info.gemmlowp_output_stage != NONE - * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and - * if the reshape of matrix B should be executed only for the first run - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, const GEMMInfo &gemm_info = GEMMInfo()); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to ClGemmLowpMatrixMultiplyCore::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info = GEMMInfo()); - - // Inherited methods overridden: - void run(ITensorPack &tensors) override; - void prepare(ITensorPack &constants) override; - experimental::MemoryRequirements workspace() const override; - -private: - enum AuxTensorIdx - { - ResultS32 = 0, - RhsQAsymm8, - RhsReshape, - VecSumCol, - VecSumRow, - Multipliers, - Shifts, - Count - }; - -private: - // Kernels used - std::unique_ptr<kernels::ClCastKernel> _weights_to_qasymm8; - std::unique_ptr<kernels::ClGemmLowpMatrixMultiplyNativeKernel> _mm_native_kernel; - std::unique_ptr<kernels::ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel> _mm_reshaped_only_rhs_kernel; - std::unique_ptr<kernels::ClGemmReshapeRhsMatrixKernel> _mtx_b_reshape_kernel; - std::unique_ptr<kernels::ClGemmLowpMatrixAReductionKernel> _mtx_a_reduction_kernel; - std::unique_ptr<kernels::ClGemmLowpMatrixBReductionKernel> _mtx_b_reduction_kernel; - std::unique_ptr<kernels::ClGemmLowpOffsetContributionKernel> _offset_contribution_kernel; - std::unique_ptr<kernels::ClGemmLowpOffsetContributionOutputStageKernel> _offset_contribution_output_stage_kernel; - - // Temporary tensors - TensorInfo _qasymm8_weights{}; - TensorInfo _vector_sum_col{}; - TensorInfo _vector_sum_row{}; - TensorInfo _tmp_b{}; - TensorInfo _mm_result_s32{}; - TensorInfo _gemm_output_stage_multipliers{}; - TensorInfo _gemm_output_stage_shifts{}; - - int32_t _a_offset{ 0 }; - int32_t _b_offset{ 0 }; - bool _is_gemm_reshaped{ true }; - bool _reshape_b_only_on_first_run{ false }; - bool _run_output_stage{ false }; - bool _convert_to_qasymm8{ false }; - bool _run_offset_contribution{ false }; - bool _is_prepared{ false }; - GEMMInfo _gemm_info{}; - - experimental::MemoryRequirements _aux_mem{}; -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_GEMMLOWP_MATRIXMULTIPLY_CORE_H */
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClGemmLowpOutputStage.cpp b/src/runtime/gpu/cl/operators/ClGemmLowpOutputStage.cpp deleted file mode 100644 index 3477583c76..0000000000 --- a/src/runtime/gpu/cl/operators/ClGemmLowpOutputStage.cpp +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClGemmLowpOutputStage.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLScheduler.h" - -#include "src/core/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleByFixedPointKernel.h" -#include "src/core/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleByFloatKernel.h" -#include "src/core/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClGemmLowpOutputStage::configure(const CLCompileContext &compile_context, const ITensorInfo *src, const ITensorInfo *bias, ITensorInfo *dst, const GEMMLowpOutputStageInfo &info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); - - switch(info.type) - { - case GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT: - { - auto k = std::make_unique<opencl::kernels::ClGemmLowpQuantizeDownInt32ScaleByFixedPointKernel>(); - k->configure(compile_context, src, bias, dst, &info); - _kernel = std::move(k); - break; - } - case GEMMLowpOutputStageType::QUANTIZE_DOWN: - { - auto k = std::make_unique<opencl::kernels::ClGemmLowpQuantizeDownInt32ScaleKernel>(); - k->configure(compile_context, src, bias, dst, &info); - _kernel = std::move(k); - break; - } - case GEMMLowpOutputStageType::QUANTIZE_DOWN_FLOAT: - { - auto k = std::make_unique<opencl::kernels::ClGemmLowpQuantizeDownInt32ScaleByFloatKernel>(); - k->configure(compile_context, src, bias, dst, &info); - _kernel = std::move(k); - break; - } - default: - ARM_COMPUTE_ERROR("Unsupported GEMMLowpOutputStage type."); - } -} - -Status ClGemmLowpOutputStage::validate(const ITensorInfo *src, const ITensorInfo *bias, const ITensorInfo *dst, const GEMMLowpOutputStageInfo &info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM16); - - switch(info.type) - { - case GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT: - return opencl::kernels::ClGemmLowpQuantizeDownInt32ScaleByFixedPointKernel::validate(src, bias, dst, &info); - case GEMMLowpOutputStageType::QUANTIZE_DOWN: - return opencl::kernels::ClGemmLowpQuantizeDownInt32ScaleKernel::validate(src, bias, dst, &info); - case GEMMLowpOutputStageType::QUANTIZE_DOWN_FLOAT: - return opencl::kernels::ClGemmLowpQuantizeDownInt32ScaleByFloatKernel::validate(src, bias, dst, &info); - default: - return ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Unsupported GEMMLowpOutputStage type."); - } -} - -void ClGemmLowpOutputStage::run(ITensorPack &tensors) -{ - const ITensor *src = tensors.get_const_tensor(ACL_SRC); - const ITensor *bias = tensors.get_const_tensor(ACL_BIAS); - ITensor *dst = tensors.get_tensor(ACL_DST); - - ITensorPack pack{ { ACL_SRC, src }, { ACL_BIAS, bias }, { ACL_DST, dst } }; - CLScheduler::get().enqueue_op(*_kernel, pack, true); -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClGemmLowpOutputStage.h b/src/runtime/gpu/cl/operators/ClGemmLowpOutputStage.h deleted file mode 100644 index 33b82fcafa..0000000000 --- a/src/runtime/gpu/cl/operators/ClGemmLowpOutputStage.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_GEMMLOWP_OUTPUT_STAGE_H -#define ARM_COMPUTE_CL_GEMMLOWP_OUTPUT_STAGE_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -/** This file contains all available output stages for GEMMLowp on OpenCL. - * - * In gemmlowp, the "output stage" is the process that takes a final int32 accumulator value (the output of @ref CLGEMMLowpMatrixMultiplyCore), - * and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value. - * - * More information about the GEMMLowp output stage can be found at https://github.com/google/gemmlowp/blob/master/doc/output.md - */ - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to execute GEMMLowpQuantizeDown kernels on CL. - * - * This function calls the following CL kernels: - * - * -# @ref opencl::kernels::ClGemmLowpQuantizeDownInt32ScaleKernel - * -# @ref opencl::kernels::ClGemmLowpQuantizeDownInt32ScaleByFloatKernel - * -# @ref opencl::kernels::ClGemmLowpQuantizeDownInt32ScaleByFixedPointKernel -*/ -class ClGemmLowpOutputStage : public IClOperator -{ -public: - /** Constructor */ - ClGemmLowpOutputStage() = default; - /** Initialise the kernel's inputs, output - * - * Valid data layouts: - * - All - * - * Valid data type configurations: - * |src0 |src1 |dst | - * |:--------------|:-------------|:-------------| - * |S32 |S32 |QASYMM8 | - * |S32 |S32 |QASYMM8_SIGNED| - * |S32 |S32 |QSYMM16 | - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p src. - * @param[out] dst Destination tensor. Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] info GEMMLowp output stage metadata. - */ - void configure(const CLCompileContext &compile_context, const ITensorInfo *src, const ITensorInfo *bias, ITensorInfo *dst, const GEMMLowpOutputStageInfo &info); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to ClGemmLowpOutputStage::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *bias, const ITensorInfo *dst, const GEMMLowpOutputStageInfo &info); - - // Inherited methods overridden: - void run(ITensorPack &tensors) override; -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_GEMMLOWP_OUTPUT_STAGE_H */ diff --git a/src/runtime/gpu/cl/operators/ClLogicalNot.cpp b/src/runtime/gpu/cl/operators/ClLogicalNot.cpp deleted file mode 100644 index 400efe450d..0000000000 --- a/src/runtime/gpu/cl/operators/ClLogicalNot.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClLogicalNot.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClLogicalNot::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>(); - k->configure(compile_context, src, dst, ElementWiseUnary::LOGICAL_NOT); - _kernel = std::move(k); -} - -Status ClLogicalNot::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::LOGICAL_NOT); -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClLogicalNot.h b/src/runtime/gpu/cl/operators/ClLogicalNot.h deleted file mode 100644 index 782ac0848f..0000000000 --- a/src/runtime/gpu/cl/operators/ClLogicalNot.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_LOGICAL_NOT_H -#define ARM_COMPUTE_CL_LOGICAL_NOT_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClElementWiseUnaryKernel for NOT operation */ -class ClLogicalNot : public IClOperator -{ -public: - /** Configure operator for a given list of arguments - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: U8. - * @param[out] dst Destination tensor info. Data types supported: same as @p src. - */ - void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to ClLogicalNot::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_LOGICAL_NOT_H */ diff --git a/src/runtime/gpu/cl/operators/ClMul.cpp b/src/runtime/gpu/cl/operators/ClMul.cpp deleted file mode 100644 index d1e2bc806f..0000000000 --- a/src/runtime/gpu/cl/operators/ClMul.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClMul.h" - -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClMulKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClMul::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique<kernels::ClMulKernel>(); - k->configure(compile_context, src1, src2, dst, scale, overflow_policy, rounding_policy, act_info); - _kernel = std::move(k); -} - -Status ClMul::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info) -{ - return kernels::ClMulKernel::validate(src1, src2, dst, scale, overflow_policy, rounding_policy, act_info); -} - -void ClComplexMul::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique<kernels::ClComplexMulKernel>(); - k->configure(compile_context, src1, src2, dst, act_info); - _kernel = std::move(k); -} - -Status ClComplexMul::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - return kernels::ClComplexMulKernel::validate(src1, src2, dst, act_info); -} -} // namespace opencl -} // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClMul.h b/src/runtime/gpu/cl/operators/ClMul.h deleted file mode 100644 index 29d5885a1c..0000000000 --- a/src/runtime/gpu/cl/operators/ClMul.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_MUL_H -#define ARM_COMPUTE_CL_MUL_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref opencl::kernels::ClMulKernel */ -class ClMul : public IClOperator -{ -public: - /** Initialise the kernel's sources, dst and convertion policy. - * - * Valid configurations (src1,src2) -> Output : - * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,U8) -> S16 - * - (S16,S16) -> S16 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - (QASYMM8,QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (QSYMM16,QSYMM16) -> QSYMM16 - * - (QSYMM16,QSYMM16) -> S32 - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] src1 An src tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * The src tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] src2 An src tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * The src tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] dst The dst tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * @param[in] scale Scale to apply after multiplication. - * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate - * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClMul::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); -}; - -/** Basic function to run @ref opencl::kernels::ClComplexMulKernel */ -class ClComplexMul : public IClOperator -{ -public: - /** Initialise the kernel's sources, dst. - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] src1 An src tensor info. Data types supported: F16/F32. Number of channels supported: 2. - * The src tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] src2 An src tensor info. Data types supported: same as @p src1. Number of channels supported: same as @p src1. - * The src tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] dst The dst tensor info, Data types supported: same as @p src1. Number of channels supported: same as @p src1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClComplexMul::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_MUL_H */
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClPRelu.cpp b/src/runtime/gpu/cl/operators/ClPRelu.cpp deleted file mode 100644 index d1ce14cc87..0000000000 --- a/src/runtime/gpu/cl/operators/ClPRelu.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClPRelu.h" -#include "src/core/gpu/cl/kernels/ClElementwiseKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -using KernelType = kernels::ClArithmeticKernel; -void ClPRelu::configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *alpha, ITensorInfo *output) -{ - auto k = std::make_unique<KernelType>(); - k->configure(compile_context, ArithmeticOperation::PRELU, input, alpha, (output == nullptr ? input : output)); - _kernel = std::move(k); -} - -Status ClPRelu::validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output) -{ - return KernelType::validate(ArithmeticOperation::PRELU, input, alpha, (output == nullptr ? input : output)); -} - -void ClPRelu::run(ITensorPack &tensors) -{ - // Output tensor can be given as nullptr for in-place computation. - // In this case, get the input tensor and use it as the output tensor. - if(tensors.get_tensor(TensorType::ACL_DST) == nullptr) - { - auto src_tensor = const_cast<ITensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_0)); - ARM_COMPUTE_ERROR_ON_MSG(src_tensor == nullptr, "invalid source tensor is given for in-place computation"); - tensors.add_tensor(TensorType::ACL_DST, src_tensor); - } - IClOperator::run(tensors); -} -} // namespace opencl -} // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClPRelu.h b/src/runtime/gpu/cl/operators/ClPRelu.h deleted file mode 100644 index 3a02030635..0000000000 --- a/src/runtime/gpu/cl/operators/ClPRelu.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_PRELU_H -#define ARM_COMPUTE_CL_PRELU_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic operator to run @ref arm_compute::opencl::kernels::ClArithmeticKernel for PRELU - * - * @note The operator implements an activation layer with the PRELU activation function. - */ -class ClPRelu : public IClOperator -{ -public: - /** Set the input and output tensor. - * - * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] alpha PRelu layer parameters. Data types supported: same of @p input. - * @param[out] output Destination tensor. Data type supported: same as @p input - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *alpha, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to ClPRelu::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output); - - // Inherited methods overridden: - void run(ITensorPack &tensors) override; -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_PRELU_H */ diff --git a/src/runtime/gpu/cl/operators/ClPermute.cpp b/src/runtime/gpu/cl/operators/ClPermute.cpp deleted file mode 100644 index 719bb6dac6..0000000000 --- a/src/runtime/gpu/cl/operators/ClPermute.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClPermute.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClPermuteKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClPermute::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const PermutationVector &perm) -{ - auto k = std::make_unique<kernels::ClPermuteKernel>(); - k->configure(compile_context, src, dst, perm); - _kernel = std::move(k); -} - -Status ClPermute::validate(const ITensorInfo *src, const ITensorInfo *dst, const PermutationVector &perm) -{ - return kernels::ClPermuteKernel::validate(src, dst, perm); -} -} // namespace opencl -} // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClPermute.h b/src/runtime/gpu/cl/operators/ClPermute.h deleted file mode 100644 index 867aba010d..0000000000 --- a/src/runtime/gpu/cl/operators/ClPermute.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_PERMUTE_H -#define ARM_COMPUTE_CL_PERMUTE_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClPermuteKernel */ -class ClPermute : public IClOperator -{ -public: - /** Initialise the kernel's inputs and outputs and permute vector - * - * @note Arbitrary permutation vectors are supported with rank not greater than 4 - * - * @param[in] compile_context The compile context to be used. - * @param[in] src The src tensor info. Data types supported: All. - * @param[in] dst The dst tensor info. Data types supported: Same as @p src - * @param[in] perm Permutation vector - */ - void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const PermutationVector &perm); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to ClPermute::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const PermutationVector &perm); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_PERMUTE_H */
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClPool2d.cpp b/src/runtime/gpu/cl/operators/ClPool2d.cpp deleted file mode 100644 index 40c2b0a8ba..0000000000 --- a/src/runtime/gpu/cl/operators/ClPool2d.cpp +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClPool2d.h" - -#include "arm_compute/runtime/CL/CLScheduler.h" - -#include "src/core/CL/kernels/CLFillBorderKernel.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClPool2dKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClPool2d::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, ITensorInfo *indices) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(src); - // Configure pooling kernel - auto k = std::make_unique<kernels::ClPool2dKernel>(); - k->set_target(CLScheduler::get().target()); - k->configure(compile_context, src, dst, info, indices); - _pooling = std::move(k); - - const DataType data_type = src->data_type(); - - // Configure border depending on operation required (quantize border in case of asymmetric data_type) - BorderMode border_mode{}; - PixelValue pixel_value(0.f); - if(is_data_type_quantized_asymmetric(data_type) && !info.exclude_padding) - { - pixel_value = PixelValue(0, data_type, src->quantization_info()); - } - - // Data layout - const auto data_layout = info.data_layout == DataLayout::UNKNOWN ? src->data_layout() : info.data_layout; - - switch(data_layout) - { - case DataLayout::NCHW: - border_mode = (PoolingType::MAX == info.pool_type) ? BorderMode::REPLICATE : BorderMode::CONSTANT; - break; - case DataLayout::NHWC: - border_mode = BorderMode::CONSTANT; - if(PoolingType::MAX == info.pool_type) - { - if(is_data_type_quantized(data_type)) - { - std::tie(pixel_value, std::ignore) = get_min_max(data_type); - } - else - { - pixel_value = PixelValue(std::numeric_limits<float>::lowest()); - } - } - break; - default: - ARM_COMPUTE_ERROR("Data layout not supported"); - } - auto b = std::make_unique<CLFillBorderKernel>(); - b->configure(compile_context, src, _pooling->border_size(), border_mode, pixel_value); - _border_handler = std::move(b); - - // Tune kernels - CLScheduler::get().tune_kernel_static(*_pooling); -} - -Status ClPool2d::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &info, const ITensorInfo *indices) -{ - return kernels::ClPool2dKernel::validate(src, dst, info, indices); -} - -void ClPool2d::run(ITensorPack &tensors) -{ - ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided"); - - CLScheduler::get().enqueue_op(*_border_handler.get(), tensors, false); - CLScheduler::get().enqueue_op(*_pooling.get(), tensors, false); -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClPool2d.h b/src/runtime/gpu/cl/operators/ClPool2d.h deleted file mode 100644 index 8ac386a64b..0000000000 --- a/src/runtime/gpu/cl/operators/ClPool2d.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_POOL2D_H -#define ARM_COMPUTE_CL_POOL2D_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -#include <memory> - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following OpenCL kernels: - * - * -# @ref CLFillBorderKernel (executed if padding size is different from zero) - * -# @ref opencl::ClPool2d - */ -class ClPool2d : public IClOperator -{ -public: - /** Constructor */ - ClPool2d() = default; - /** Configure operator for a given list of arguments - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] dst Destination tensor info. Data type supported: same as @p src - * @param[in] info Pooling layer parameters. - * @param[out] indices (optional) The indices info of the maximal values. Data type supported: U32. - */ - void configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, ITensorInfo *indices = nullptr); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to ClPool2d::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &info, const ITensorInfo *indices = nullptr); - - // Inherited method overridden - void run(ITensorPack &tensors) override; - -private: - std::unique_ptr<ICLKernel> _pooling{ nullptr }; - std::unique_ptr<ICLKernel> _border_handler{ nullptr }; -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_POOL2D_H */ diff --git a/src/runtime/gpu/cl/operators/ClQuantize.cpp b/src/runtime/gpu/cl/operators/ClQuantize.cpp deleted file mode 100644 index 92bbb62ba5..0000000000 --- a/src/runtime/gpu/cl/operators/ClQuantize.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClQuantize.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClQuantizeKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClQuantize::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClQuantizeKernel>(); - k->configure(compile_context, src, dst); - _kernel = std::move(k); -} - -Status ClQuantize::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClQuantizeKernel::validate(src, dst); -} - -void ClQuantize::run(ITensorPack &tensors) -{ - ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided"); - CLScheduler::get().enqueue_op(*_kernel.get(), tensors); -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClQuantize.h b/src/runtime/gpu/cl/operators/ClQuantize.h deleted file mode 100644 index b15d389cca..0000000000 --- a/src/runtime/gpu/cl/operators/ClQuantize.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_QUANTIZE_H -#define ARM_COMPUTE_CL_QUANTIZE_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClQuantizeKernel that dequantizes an input tensor */ -class ClQuantize : public IClOperator -{ -public: - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/32. - * @param[out] dst Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. - * - * @note Output auto initialization is not supported by this function - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClQuantize::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); - - // Inherited method overridden - void run(ITensorPack &tensors) override; -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_QUANTIZE_H */ diff --git a/src/runtime/gpu/cl/operators/ClReshape.cpp b/src/runtime/gpu/cl/operators/ClReshape.cpp deleted file mode 100644 index d3fa9f10ab..0000000000 --- a/src/runtime/gpu/cl/operators/ClReshape.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClReshape.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClReshapeKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClReshape::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClReshapeKernel>(); - k->configure(compile_context, src, dst); - _kernel = std::move(k); -} - -Status ClReshape::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClReshapeKernel::validate(src, dst); -} -} // namespace opencl -} // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClReshape.h b/src/runtime/gpu/cl/operators/ClReshape.h deleted file mode 100644 index b3d9267be4..0000000000 --- a/src/runtime/gpu/cl/operators/ClReshape.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_RESHAPE_H -#define ARM_COMPUTE_CL_RESHAPE_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClReshapeKernel */ -class ClReshape : public IClOperator -{ -public: - /** Initialise the kernel's inputs and outputs - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor info. Data type supported: All - * @param[out] output Output info. Data type supported: Same as @p input - */ - void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to ClReshape::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_RESHAPE_H */
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClScale.cpp b/src/runtime/gpu/cl/operators/ClScale.cpp deleted file mode 100644 index 5c8d754c7e..0000000000 --- a/src/runtime/gpu/cl/operators/ClScale.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClScale.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLFillBorderKernel.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClScaleKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClScale::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const ScaleKernelInfo &info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(src); - // Configure Scale kernel - auto k = std::make_unique<kernels::ClScaleKernel>(); - k->set_target(CLScheduler::get().target()); - k->configure(compile_context, src, dst, info); - _kernel = std::move(k); - - // Tune kernel - CLScheduler::get().tune_kernel_static(*_kernel); -} - -Status ClScale::validate(const ITensorInfo *src, const ITensorInfo *dst, const ScaleKernelInfo &info) -{ - return kernels::ClScaleKernel::validate(src, dst, info); -} - -void ClScale::run(ITensorPack &tensors) -{ - ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided"); - CLScheduler::get().enqueue_op(*_kernel.get(), tensors); -} -} // namespace opencl -} // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClScale.h b/src/runtime/gpu/cl/operators/ClScale.h deleted file mode 100644 index 0ff78640f7..0000000000 --- a/src/runtime/gpu/cl/operators/ClScale.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_SCALE_H -#define ARM_COMPUTE_CL_SCALE_H - -#include "arm_compute/core/KernelDescriptors.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to simulate a scale layer. This function calls the following OpenCL kernels: - * - * -# @ref kernels::ClScaleKernel - */ -class ClScale : public IClOperator -{ -public: - /** Constructor */ - ClScale() = default; - /** Initialize the function's source, destination, interpolation type and border_mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in,out] src Source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED) - * @param[out] dst Destination tensor info. Data types supported: Same as @p src - * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. - * @param[in] info @ref ScaleKernelInfo descriptor to be used to configure - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const ScaleKernelInfo &info); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to ClScale::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const ScaleKernelInfo &info); - - // Inherited method overridden - void run(ITensorPack &tensors) override; -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLSCALE_H */ diff --git a/src/runtime/gpu/cl/operators/ClSoftmax.cpp b/src/runtime/gpu/cl/operators/ClSoftmax.cpp deleted file mode 100644 index 975bb0b932..0000000000 --- a/src/runtime/gpu/cl/operators/ClSoftmax.cpp +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClSoftmax.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "src/core/gpu/cl/kernels/ClSoftmaxKernel.h" -#include "src/core/helpers/MemoryHelpers.h" -#include "src/core/helpers/SoftmaxHelpers.h" -#include "src/runtime/gpu/cl/operators/ClPermute.h" -#include "src/runtime/gpu/cl/utils/ClAuxTensorHandler.h" -#include "support/Cast.h" - -using namespace arm_compute::experimental; - -namespace arm_compute -{ -namespace opencl -{ -ClSoftmax::ClSoftmax() - : _permute_input(std::make_unique<ClPermute>()), - _permute_output(std::make_unique<ClPermute>()), - _max_shift_exp_sum_kernel(std::make_unique<kernels::ClLogits1DMaxShiftExpSumKernel>()), - _norm_kernel(std::make_unique<kernels::ClLogits1DNormKernel>()), - _max_info(), - _sum_info(), - _tmp_info(), - _permuted_src_info(), - _permuted_dst_info(), - _aux_mem(InternalTensorIdx::COUNT) -{ -} - -void ClSoftmax::configure(const CLCompileContext &compile_context, const ITensorInfo &src, ITensorInfo &dst, const SoftmaxKernelInfo &info) -{ - ARM_COMPUTE_ERROR_THROW_ON(validate(src, dst, info)); - - const size_t actual_axis = static_cast<size_t>(wrap_around(info.axis, static_cast<int32_t>(src.num_dimensions()))); - - _needs_permute = actual_axis != 0; - - const ITensorInfo &tmp_input_info = _needs_permute ? _permuted_src_info : src; - ITensorInfo &tmp_output_info = _needs_permute ? _permuted_dst_info : dst; - - if(_needs_permute) - { - const auto perm_info = softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis); - _permute_input->configure(compile_context, &src, &_permuted_src_info, perm_info); - } - - DataType tmp_data_type = is_data_type_quantized_asymmetric(tmp_input_info.data_type()) ? DataType::S32 : tmp_input_info.data_type(); - _tmp_info = tmp_input_info.clone()->set_data_type(tmp_data_type); - - TensorShape max_sum_shape = tmp_input_info.tensor_shape(); - _max_info = tmp_input_info.clone()->set_tensor_shape(max_sum_shape); - _sum_info = tmp_input_info.clone()->set_tensor_shape(max_sum_shape).set_data_type(tmp_data_type); - - // Set GPU target to kernels - _max_shift_exp_sum_kernel->set_target(CLScheduler::get().target()); - - _max_shift_exp_sum_kernel->configure(compile_context, tmp_input_info, _max_info, _tmp_info, _sum_info, info); - _norm_kernel->configure(compile_context, _tmp_info, _sum_info, tmp_output_info, info); - - if(_needs_permute) - { - const auto perm_info = softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis); - _permute_output->configure(compile_context, &_permuted_dst_info, &dst, perm_info); - } - - _aux_mem[InternalTensorIdx::SUM] = MemoryInfo(offset_int_vec(InternalTensorIdx::SUM), MemoryLifetime::Temporary, _sum_info.total_size()); - _aux_mem[InternalTensorIdx::TMP] = MemoryInfo(offset_int_vec(InternalTensorIdx::TMP), MemoryLifetime::Temporary, _tmp_info.total_size()); - _aux_mem[InternalTensorIdx::MAX] = MemoryInfo(offset_int_vec(InternalTensorIdx::MAX), MemoryLifetime::Temporary, _max_info.total_size()); - - _aux_mem[InternalTensorIdx::PERMUTED_SRC] = MemoryInfo(offset_int_vec(InternalTensorIdx::PERMUTED_SRC), MemoryLifetime::Temporary, _permuted_src_info.total_size()); - _aux_mem[InternalTensorIdx::PERMUTED_DST] = MemoryInfo(offset_int_vec(InternalTensorIdx::PERMUTED_DST), MemoryLifetime::Temporary, _permuted_dst_info.total_size()); -} - -Status ClSoftmax::validate(const ITensorInfo &src, const ITensorInfo &dst, const SoftmaxKernelInfo &info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_MSG(src.num_dimensions() > 4, "Only up to 4 dimensions are supported"); - ARM_COMPUTE_UNUSED(info.beta); - ARM_COMPUTE_RETURN_ERROR_ON(info.axis < static_cast<int32_t>(-src.num_dimensions()) || static_cast<int32_t>(src.num_dimensions()) <= info.axis); - - const size_t actual_axis = static_cast<size_t>(wrap_around(info.axis, static_cast<int32_t>(src.num_dimensions()))); - const bool needs_permute = actual_axis != 0; - if(needs_permute) - { - const PermutationVector permutation_vector = softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis); - const TensorShape permuted_shape = misc::shape_calculator::compute_permutation_output_shape(src, permutation_vector); - TensorInfo input_permuted(src.clone()->set_tensor_shape(permuted_shape)); - ARM_COMPUTE_RETURN_ON_ERROR(ClPermute::validate(&src, &input_permuted, permutation_vector)); - TensorInfo output_permuted(dst.clone()->set_tensor_shape(permuted_shape)); - ARM_COMPUTE_RETURN_ON_ERROR(ClPermute::validate(&output_permuted, &dst, permutation_vector)); - } - - // Create intermediate tensor info - DataType tmp_data_type = is_data_type_quantized_asymmetric(src.data_type()) ? DataType::S32 : src.data_type(); - TensorInfo tensor_info_tmp(src.clone()->set_data_type(tmp_data_type).set_is_resizable(true)); - - TensorShape max_sum_shape = src.tensor_shape(); - max_sum_shape.set(0, 1); - TensorInfo tensor_info_max(src.clone()->set_tensor_shape(max_sum_shape).set_is_resizable(true)); - TensorInfo tensor_info_sum(src.clone()->set_tensor_shape(max_sum_shape).set_data_type(tmp_data_type).set_quantization_info(QuantizationInfo()).set_is_resizable(true)); - - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClLogits1DMaxShiftExpSumKernel::validate(src, tensor_info_max, tensor_info_tmp, tensor_info_sum)); - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClLogits1DNormKernel::validate(tensor_info_tmp, tensor_info_sum, dst, info)); - - return Status{}; -} - -void ClSoftmax::run(ITensorPack &tensors) -{ - auto src = tensors.get_const_tensor(TensorType::ACL_SRC); - auto dst = tensors.get_tensor(TensorType::ACL_DST); - - CLAuxTensorHandler sum(offset_int_vec(InternalTensorIdx::SUM), _sum_info, tensors, false); - CLAuxTensorHandler tmp(offset_int_vec(InternalTensorIdx::TMP), _tmp_info, tensors, false); - CLAuxTensorHandler max(offset_int_vec(InternalTensorIdx::MAX), _max_info, tensors, false); - - CLAuxTensorHandler permuted_src(offset_int_vec(InternalTensorIdx::PERMUTED_SRC), _permuted_src_info, tensors, false); - CLAuxTensorHandler permuted_dst(offset_int_vec(InternalTensorIdx::PERMUTED_DST), _permuted_dst_info, tensors, false); - - if(_needs_permute) - { - ITensorPack pack; - pack.add_const_tensor(TensorType::ACL_SRC, src); - pack.add_tensor(TensorType::ACL_DST, permuted_src.get()); - _permute_input.get()->run(pack); - } - - ITensorPack sum_pack; - ITensorPack norm_pack; - if(_needs_permute) - { - sum_pack.add_const_tensor(TensorType::ACL_SRC, permuted_src.get()); - norm_pack.add_tensor(TensorType::ACL_DST, permuted_dst.get()); - } - else - { - sum_pack.add_const_tensor(TensorType::ACL_SRC, src); - norm_pack.add_tensor(TensorType::ACL_DST, dst); - } - sum_pack.add_tensor(TensorType::ACL_DST, tmp.get()); - sum_pack.add_tensor(TensorType::ACL_INT_0, max.get()); - sum_pack.add_tensor(TensorType::ACL_INT_1, sum.get()); - - norm_pack.add_const_tensor(TensorType::ACL_SRC, tmp.get()); - norm_pack.add_tensor(TensorType::ACL_INT_0, sum.get()); - - CLScheduler::get().enqueue_op(*_max_shift_exp_sum_kernel.get(), sum_pack, false); - CLScheduler::get().enqueue_op(*_norm_kernel.get(), norm_pack, false); - - if(_needs_permute) - { - ITensorPack pack; - pack.add_const_tensor(TensorType::ACL_SRC, permuted_dst.get()); - pack.add_tensor(TensorType::ACL_DST, dst); - _permute_output.get()->run(pack); - } -} - -experimental::MemoryRequirements ClSoftmax::workspace() const -{ - return _aux_mem; -} -} // namespace opencl -} // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClSoftmax.h b/src/runtime/gpu/cl/operators/ClSoftmax.h deleted file mode 100644 index c85b193d9d..0000000000 --- a/src/runtime/gpu/cl/operators/ClSoftmax.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_SOFTMAX_H -#define ARM_COMPUTE_CL_SOFTMAX_H - -#include "arm_compute/runtime/CL/CLTensor.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -struct SoftmaxKernelInfo; - -namespace opencl -{ -class ClPermute; -namespace kernels -{ -class ClLogits1DMaxShiftExpSumKernel; -class ClLogits1DNormKernel; -} // namespace kernels -class ClSoftmax : public IClOperator -{ -public: - /** Constructor */ - ClSoftmax(); - /** Configure the operator - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax - * @param[out] dst Destination tensor info. Data types supported: same as @p src - * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. - */ - void configure(const CLCompileContext &compile_context, const ITensorInfo &src, ITensorInfo &dst, const SoftmaxKernelInfo &info); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to ClSoftmax::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo &src, const ITensorInfo &dst, const SoftmaxKernelInfo &info); - // Inherited methods overridden: - void run(ITensorPack &tensors) override; - experimental::MemoryRequirements workspace() const override; - -private: - enum InternalTensorIdx - { - MAX = 0, - SUM, - TMP, - PERMUTED_SRC, - PERMUTED_DST, - COUNT - }; - - std::unique_ptr<ClPermute> _permute_input; - std::unique_ptr<ClPermute> _permute_output; - std::unique_ptr<kernels::ClLogits1DMaxShiftExpSumKernel> _max_shift_exp_sum_kernel; - std::unique_ptr<kernels::ClLogits1DNormKernel> _norm_kernel; - bool _needs_permute{ false }; - - TensorInfo _max_info; - TensorInfo _sum_info; - TensorInfo _tmp_info; - TensorInfo _permuted_src_info; - TensorInfo _permuted_dst_info; - - experimental::MemoryRequirements _aux_mem{}; -}; - -} // opencl -} // arm_compute -#endif /* ARM_COMPUTE_CL_SOFTMAX_H */
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClSub.cpp b/src/runtime/gpu/cl/operators/ClSub.cpp deleted file mode 100644 index 429f23a837..0000000000 --- a/src/runtime/gpu/cl/operators/ClSub.cpp +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClSub.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClElementwiseKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClSub::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, - ConvertPolicy policy, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique<kernels::ClSaturatedArithmeticKernel>(); - k->configure(compile_context, ArithmeticOperation::SUB, src1, src2, dst, policy, act_info); - _kernel = std::move(k); -} - -Status ClSub::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, - ConvertPolicy policy, const ActivationLayerInfo &act_info) -{ - return kernels::ClSaturatedArithmeticKernel::validate(ArithmeticOperation::SUB, src1, src2, dst, policy, act_info); -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClSub.h b/src/runtime/gpu/cl/operators/ClSub.h deleted file mode 100644 index 2dac11c00e..0000000000 --- a/src/runtime/gpu/cl/operators/ClSub.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_SUB_H -#define ARM_COMPUTE_CL_SUB_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run arithmetic subtraction - * - * @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. - * @note The function performs an arithmetic subtraction between two tensors. - */ -class ClSub : public IClOperator -{ -public: - /** Configure function for a given list of arguments. - * - * Valid configurations (src1,src2) -> dst : - * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (S16,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,S16) -> S16 - * - (S32,S32) -> S32 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - (QASYMM8,QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (QSYMM16,QSYMM16) -> QSYMM16 - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. - * The source tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] src2 Second source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. - * The source tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] dst Destination tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. - * @param[in] policy Policy to use to handle overflow. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, ConvertPolicy policy, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClSub::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, ConvertPolicy policy, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_SUB_H */ diff --git a/src/runtime/gpu/cl/operators/ClTranspose.cpp b/src/runtime/gpu/cl/operators/ClTranspose.cpp deleted file mode 100644 index 48f44282e8..0000000000 --- a/src/runtime/gpu/cl/operators/ClTranspose.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClTranspose.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClTransposeKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClTranspose::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClTransposeKernel>(); - k->configure(compile_context, src, dst); - _kernel = std::move(k); -} - -Status ClTranspose::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClTransposeKernel::validate(src, dst); -} -} // namespace opencl -} // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClTranspose.h b/src/runtime/gpu/cl/operators/ClTranspose.h deleted file mode 100644 index dcd80820bb..0000000000 --- a/src/runtime/gpu/cl/operators/ClTranspose.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_TRANSPOSE_H -#define ARM_COMPUTE_CL_TRANSPOSE_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClTransposeKernel */ -class ClTranspose : public IClOperator -{ -public: - /** Initialise the kernel's inputs and outputs - * - * @param[in] compile_context The compile context to be used. - * @param[in] src The src tensor info. Data types supported: All. - * @param[in] dst The dst tensor info. Data types supported: Same as @p src - */ - void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to ClTranspose::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_TRANSPOSE_H */ diff --git a/src/runtime/gpu/cl/operators/ClWinogradConv2d.cpp b/src/runtime/gpu/cl/operators/ClWinogradConv2d.cpp deleted file mode 100644 index 07f90ddaef..0000000000 --- a/src/runtime/gpu/cl/operators/ClWinogradConv2d.cpp +++ /dev/null @@ -1,306 +0,0 @@ -/* - * Copyright (c) 2018-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClWinogradConv2d.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/experimental/Types.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLFillBorderKernel.h" -#include "src/core/CL/kernels/CLFillBorderKernel.h" -#include "src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.h" -#include "src/core/gpu/cl/kernels/ClWinogradInputTransformKernel.h" -#include "src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.h" -#include "src/core/helpers/MemoryHelpers.h" -#include "src/runtime/gpu/cl/utils/ClAuxTensorHandler.h" -#include "support/Cast.h" - -using namespace arm_compute::experimental; - -namespace arm_compute -{ -namespace opencl -{ -namespace -{ -Size2D winograd_output_tile(const Size2D &input_dims, const Size2D &kernel_dims, DataLayout data_layout) -{ - Size2D output_tile = Size2D{}; - - const unsigned int kernel_max_dim = std::max(kernel_dims.width, kernel_dims.height); - - // Check if the input spatial dimensions are smaller than 4 - const bool is_input_lt4_nchw = (input_dims.width <= 4 && input_dims.height <= 4) && (data_layout == DataLayout::NCHW); - - if(kernel_max_dim == 3U) - { - if(kernel_dims == Size2D(3U, 3U)) - { - output_tile = is_input_lt4_nchw ? Size2D(2U, 2U) : Size2D(4U, 4U); - } - else if(kernel_dims == Size2D(3U, 1U)) - { - output_tile = is_input_lt4_nchw ? Size2D(2U, 1U) : Size2D(4U, 1U); - } - else - { - output_tile = is_input_lt4_nchw ? Size2D(1U, 2U) : Size2D(1U, 4U); - } - } - else if(kernel_max_dim == 5U) - { - output_tile = Size2D(kernel_dims.width == 1 ? 1U : 4U, - kernel_dims.height == 1 ? 1U : 4U); - } - else if(kernel_max_dim == 7U) - { - output_tile = Size2D(kernel_dims.width == 1 ? 1U : 2U, - kernel_dims.height == 1 ? 1U : 2U); - } - - return output_tile; -} - -bool check_support_fast_math(const Size2D &output_tile, const Size2D &kernel_size) -{ - // Check if we want to configure a Winograd configuration which requires fast math - using WinogradConfiguration = std::pair<std::pair<int, int>, std::pair<int, int>>; - - std::vector<WinogradConfiguration> fast_math_winograd = - { - WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5)), - WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(7, 7)) - }; - - auto p = std::make_pair(std::pair<int, int>(output_tile.width, output_tile.height), - std::pair<int, int>(kernel_size.width, kernel_size.height)); - - return std::find(fast_math_winograd.begin(), fast_math_winograd.end(), p) != fast_math_winograd.end(); -} - -Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, - const ActivationLayerInfo &act_info, bool enable_fast_math) -{ - // Get indeces for the width and height - const size_t idx_width = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH); - const size_t idx_height = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT); - - // Input shape, kernel size and output tile - const Size2D input_dims = Size2D(src->tensor_shape()[idx_width], src->tensor_shape()[idx_height]); - const Size2D kernel_size = Size2D(weights->tensor_shape()[idx_width], weights->tensor_shape()[idx_height]); - const Size2D output_tile = winograd_output_tile(input_dims, kernel_size, src->data_layout()); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(((conv_info.pad_left() > (kernel_size.x() / 2u)) || (conv_info.pad_right() > (kernel_size.x() / 2u))), "Winograd only supports padding up to half kernel size"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(((conv_info.pad_top() > (kernel_size.y() / 2u)) || (conv_info.pad_bottom() > (kernel_size.y() / 2u))), "Winograd only supports padding up to half kernel size"); - - // Check if the Winograd configuration requires fast math - if(!enable_fast_math) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F32); //disable winograd for fp16 if fast math is false. - ARM_COMPUTE_RETURN_ERROR_ON_MSG(check_support_fast_math(output_tile, kernel_size), "This Winograd configuration requires enable_fast_math=true"); - } - - const WinogradInfo winograd_info = WinogradInfo(output_tile, - kernel_size, - input_dims, - conv_info, - src->data_layout()); - - // Validate input transform - const TensorShape input0_shape = misc::shape_calculator::compute_winograd_input_transform_shape(*src, winograd_info); - const TensorInfo input0 = src->clone()->set_tensor_shape(input0_shape); - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWinogradInputTransformKernel::validate(src, &input0, winograd_info)); - - // Validate filter transform - const TensorShape input1_shape = misc::shape_calculator::compute_winograd_filter_transform_shape(*weights, winograd_info); - const TensorInfo input1 = weights->clone()->set_tensor_shape(input1_shape); - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWinogradFilterTransformKernel::validate(weights, &input1, winograd_info)); - - // Validate batched matrix multiply - TensorShape batched_mm_output_shape = input0.tensor_shape(); - batched_mm_output_shape[0] = input1.tensor_shape()[0]; - const TensorInfo batched_mm_output = input0.clone()->set_tensor_shape(batched_mm_output_shape); - ARM_COMPUTE_RETURN_ON_ERROR(ClGemm::validate(&input0, &input1, nullptr, &batched_mm_output, 1.0f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run*/, 0, false, false, - GEMMLowpOutputStageInfo(), (src->data_type() == DataType::F16)))); - - // Configure output transform - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWinogradOutputTransformKernel::validate(&batched_mm_output, biases, dst, winograd_info, act_info)); - return Status{}; -} - -} // namespace - -ClWinogradConv2d::ClWinogradConv2d() - : _batched_mm(), - _input_transform(std::make_unique<kernels::ClWinogradInputTransformKernel>()), - _filter_transform(std::make_unique<kernels::ClWinogradFilterTransformKernel>()), - _output_transform(std::make_unique<kernels::ClWinogradOutputTransformKernel>()), - _border_handler(), - _input0(), - _input1(), - _batched_mm_output(), - _is_prepared(false), - _aux_mem() -{ -} - -ClWinogradConv2d::~ClWinogradConv2d() = default; - -void ClWinogradConv2d::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, - const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, bool enable_fast_math) -{ - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, weights, biases, dst, conv_info, act_info, enable_fast_math)); - // Get indices for the width and height - const size_t idx_width = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH); - const size_t idx_height = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT); - - // Input shape, kernel size and output tile - const Size2D input_dims = Size2D(src->tensor_shape()[idx_width], src->tensor_shape()[idx_height]); - const Size2D kernel_size = Size2D(weights->tensor_shape()[idx_width], weights->tensor_shape()[idx_height]); - const Size2D output_tile = winograd_output_tile(input_dims, kernel_size, src->data_layout()); - - // Check if the Winograd configuration requires fast math - if(!enable_fast_math) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F32); //disable winograd for fp16 if fast math is false. - ARM_COMPUTE_ERROR_ON_MSG(check_support_fast_math(output_tile, kernel_size), "This Winograd configuration requires enable_fast_math=true"); - } - const WinogradInfo winograd_info = WinogradInfo(output_tile, - kernel_size, - input_dims, - conv_info, - src->data_layout()); - - _is_prepared = false; - - // Configure input transform - _input_transform->configure(compile_context, src, &_input0, winograd_info); - _border_handler.configure(compile_context, src, _input_transform->border_size(), BorderMode::CONSTANT, PixelValue()); - - // Configure filter transform - _filter_transform->configure(compile_context, weights, &_input1, winograd_info); - - // Configure batched matrix multiply - _batched_mm.configure(compile_context, &_input0, &_input1, nullptr, &_batched_mm_output, 1.0f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run*/, 0, - false, false, - GEMMLowpOutputStageInfo(), - (src->data_type() == DataType::F16))); - - // Configure output transform - _output_transform->configure(compile_context, &_batched_mm_output, biases, dst, winograd_info, act_info); - - _aux_mem = _batched_mm.workspace(); - const MemoryLifetime wino_wei_lifetm = std::any_of(std::begin(_aux_mem), std::end(_aux_mem), [](const auto & r) - { - return (r.lifetime == MemoryLifetime::Persistent) && (r.size > 0); - }) ? - MemoryLifetime::Prepare : - MemoryLifetime::Persistent; - _aux_mem.push_back(MemoryInfo(offset_int_vec(2), MemoryLifetime::Temporary, _input0.total_size())); - _aux_mem.push_back(MemoryInfo(offset_int_vec(3), wino_wei_lifetm, _input1.total_size())); - _aux_mem.push_back(MemoryInfo(offset_int_vec(4), MemoryLifetime::Temporary, _batched_mm_output.total_size())); -} - -Status ClWinogradConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, - const ActivationLayerInfo &act_info, bool enable_fast_math) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, weights, biases, dst, conv_info, act_info, enable_fast_math)); - return Status{}; -} - -void ClWinogradConv2d::run(ITensorPack &tensors) -{ - const bool is_gemm_reshaped = _aux_mem[3].lifetime == MemoryLifetime::Prepare; - - auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_0)); - auto biases = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_2)); - auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST)); - - CLAuxTensorHandler input0(offset_int_vec(2), _input0, tensors, true); - CLAuxTensorHandler input1(offset_int_vec(3), _input1, tensors, true, is_gemm_reshaped); - CLAuxTensorHandler batched_mm_output(offset_int_vec(4), _batched_mm_output, tensors, true); - - prepare(tensors); - - // Run input transform - ITensorPack pack_it - { - { TensorType::ACL_SRC, src }, - { TensorType::ACL_DST, input0.get() }, - }; - CLScheduler::get().enqueue_op(_border_handler, pack_it, false); - CLScheduler::get().enqueue_op(*_input_transform, pack_it, false); - - // Run batched matrix multiplication - ITensorPack pack_mm = tensors; - pack_mm.add_const_tensor(TensorType::ACL_SRC_0, input0.get()); - pack_mm.add_tensor(TensorType::ACL_DST, batched_mm_output.get()); - is_gemm_reshaped ? pack_mm.remove_tensor(TensorType::ACL_SRC_1) : pack_mm.add_const_tensor(TensorType::ACL_SRC_1, input1.get()); - _batched_mm.run(pack_mm); - - // Run output transform - ITensorPack pack_ot - { - { TensorType::ACL_SRC_0, batched_mm_output.get() }, - { TensorType::ACL_SRC_1, biases }, - { TensorType::ACL_DST, dst }, - }; - CLScheduler::get().enqueue_op(*_output_transform, pack_ot); -} - -void ClWinogradConv2d::prepare(ITensorPack &tensors) -{ - if(!_is_prepared) - { - auto weights = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_1)); - ICLTensor *in1_aux = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(offset_int_vec(3))); - - CLAuxTensorHandler input1(_input1, *in1_aux); - ITensorPack pack_ft - { - { TensorType::ACL_SRC, weights }, - { TensorType::ACL_DST, input1.get() }, - }; - // Run filter transform and mark original weights as unused - CLScheduler::get().enqueue_op(*_filter_transform, pack_ft, false); - weights->mark_as_unused(); - - // Prepare GEMM and release reshaped weights if marked unused by ClGemm - ITensorPack mm_prepare_pack = tensors; - mm_prepare_pack.add_tensor(ACL_SRC_1, input1.get()); - _batched_mm.prepare(mm_prepare_pack); - - CLScheduler::get().queue().finish(); - _is_prepared = true; - } -} - -experimental::MemoryRequirements ClWinogradConv2d::workspace() const -{ - return _aux_mem; -} -} // namespace opencl -} // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClWinogradConv2d.h b/src/runtime/gpu/cl/operators/ClWinogradConv2d.h deleted file mode 100644 index 83b31f1c99..0000000000 --- a/src/runtime/gpu/cl/operators/ClWinogradConv2d.h +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2018-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_WINOGRADCONV2D_H -#define ARM_COMPUTE_CL_WINOGRADCONV2D_H - -#include "arm_compute/runtime/CL/CLTensor.h" -#include "src/core/CL/kernels/CLFillBorderKernel.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" -#include "src/runtime/gpu/cl/operators/ClGemm.h" - -namespace arm_compute -{ -class CLCompileContext; -class ITensorInfo; -namespace opencl -{ -namespace kernels -{ -class ClWinogradInputTransformKernel; -class ClWinogradFilterTransformKernel; -class ClWinogradOutputTransformKernel; -} // kernels -/** Basic function to execute Winograd-based convolution on OpenCL. This function calls the following OpenCL functions/kernels: - * - * -# @ref kernels::ClWinogradInputTransformKernel - * -# @ref kernels::ClWinogradFilterTransformKernel (only once) - * -# @ref ClGemm - * -# @ref kernels::ClWinogradOutputTransformKernel - * - */ -class ClWinogradConv2d : public IClOperator -{ -public: - /** Default constructor */ - ClWinogradConv2d(); - /** Default destructor */ - ~ClWinogradConv2d(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ClWinogradConv2d(const ClWinogradConv2d &) = delete; - /** Default move constructor */ - ClWinogradConv2d(ClWinogradConv2d &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ClWinogradConv2d &operator=(const ClWinogradConv2d &) = delete; - /** Default move assignment operator */ - ClWinogradConv2d &operator=(ClWinogradConv2d &&) = default; - /** Set the input and output tensors. - * - * Valid data layouts: - * - NHWC - * - NCHW - * - * Valid data type configurations: - * |src0 |src1 |src2 |dst | - * |:--------------|:--------------|:------|:--------------| - * |F16 |F16 |F16 |F16 | - * |F32 |F32 |F32 |F32 | - * - * @note: This function only works with 3x3,3x1,1x3,5x5,5x1,1x5,7x1 and 1x7 kernels along with unit strides for both NCHW and NHWC data layout - * @note Some Winograd configurations (i.e. F(4x4, 5x5)) are supported only with enable_fast_math = true - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: F16/F32. - * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p src. - * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p src - * @param[out] dst Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. - * Data types supported: Same as @p src. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation - * available which may introduce a drop of accuracy as well. Default is false - */ - void configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info, - const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to ClWinogradConv2d::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, - const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false); - - // Inherited method overridden - void run(ITensorPack &tensors) override; - void prepare(ITensorPack &tensors) override; - experimental::MemoryRequirements workspace() const override; - -private: - ClGemm _batched_mm; - std::unique_ptr<kernels::ClWinogradInputTransformKernel> _input_transform; - std::unique_ptr<kernels::ClWinogradFilterTransformKernel> _filter_transform; - std::unique_ptr<kernels::ClWinogradOutputTransformKernel> _output_transform; - CLFillBorderKernel _border_handler; - TensorInfo _input0; - TensorInfo _input1; - TensorInfo _batched_mm_output; - bool _is_prepared; - experimental::MemoryRequirements _aux_mem{}; -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_WINOGRADCONV2D_H */ diff --git a/src/runtime/gpu/cl/utils/ClAuxTensorHandler.h b/src/runtime/gpu/cl/utils/ClAuxTensorHandler.h deleted file mode 100644 index af383489a1..0000000000 --- a/src/runtime/gpu/cl/utils/ClAuxTensorHandler.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_UTILS_CL_AUX_TENSOR_HANDLER_H -#define ARM_COMPUTE_CL_UTILS_CL_AUX_TENSOR_HANDLER_H - -#include "arm_compute/core/ITensorPack.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/runtime/CL/CLTensor.h" - -#include "src/common/utils/Log.h" -#include "support/Cast.h" - -namespace arm_compute -{ -namespace opencl -{ -/* Tensor handler to wrap and handle tensor allocations on workspace buffers */ -class CLAuxTensorHandler -{ -public: - CLAuxTensorHandler(int slot_id, TensorInfo &info, ITensorPack &pack, bool pack_inject = false, bool bypass_alloc = false) - : _tensor() - { - if(info.total_size() == 0) - { - return; - } - _tensor.allocator()->soft_init(info); - - ICLTensor *packed_tensor = utils::cast::polymorphic_downcast<ICLTensor *>(pack.get_tensor(slot_id)); - if((packed_tensor == nullptr) || (info.total_size() > packed_tensor->info()->total_size())) - { - if(!bypass_alloc) - { - _tensor.allocator()->allocate(); - ARM_COMPUTE_LOG_INFO_WITH_FUNCNAME_ACL("Allocating auxiliary tensor"); - } - - if(pack_inject) - { - pack.add_tensor(slot_id, &_tensor); - _injected_tensor_pack = &pack; - _injected_slot_id = slot_id; - } - } - else - { - _tensor.allocator()->import_memory(packed_tensor->cl_buffer()); - } - } - - CLAuxTensorHandler(TensorInfo &info, ICLTensor &tensor) - : _tensor() - { - _tensor.allocator()->soft_init(info); - if(info.total_size() <= tensor.info()->total_size()) - { - _tensor.allocator()->import_memory(tensor.cl_buffer()); - } - } - - CLAuxTensorHandler(const CLAuxTensorHandler &) = delete; - CLAuxTensorHandler &operator=(const CLAuxTensorHandler) = delete; - - ~CLAuxTensorHandler() - { - if(_injected_tensor_pack) - { - _injected_tensor_pack->remove_tensor(_injected_slot_id); - } - } - - ICLTensor *get() - { - return &_tensor; - } - - ICLTensor *operator()() - { - return &_tensor; - } - -private: - CLTensor _tensor{}; - ITensorPack *_injected_tensor_pack{ nullptr }; - int _injected_slot_id{ TensorType::ACL_UNKNOWN }; -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_UTILS_CL_AUX_TENSOR_HANDLER_H */
\ No newline at end of file |