From 237be03ada644232b19005f44742b97efa5dbfc6 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Mon, 25 Jan 2021 15:44:02 +0000 Subject: Make Gpu Elementwise Operations kernels and functions state-less Partially resolves COMPMID-4004 Change-Id: Ie99b6c676af5be4c2a67affc06fec378f3dff8ec Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4918 Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- Android.bp | 1 + .../runtime/CL/functions/CLElementwiseOperations.h | 183 -------------------- .../CL/functions/CLElementwiseOperations.cpp | 175 ++++--------------- .../gpu/cl/operators/ClElementwiseOperations.cpp | 92 ++++++++++ .../gpu/cl/operators/ClElementwiseOperations.h | 190 +++++++++++++++++++++ 5 files changed, 317 insertions(+), 324 deletions(-) create mode 100644 src/runtime/gpu/cl/operators/ClElementwiseOperations.cpp create mode 100644 src/runtime/gpu/cl/operators/ClElementwiseOperations.h diff --git a/Android.bp b/Android.bp index 7cfa817193..a1898b5432 100644 --- a/Android.bp +++ b/Android.bp @@ -797,6 +797,7 @@ cc_library_static { "src/runtime/gpu/cl/operators/ClActivation.cpp", "src/runtime/gpu/cl/operators/ClAdd.cpp", "src/runtime/gpu/cl/operators/ClConcatenate.cpp", + "src/runtime/gpu/cl/operators/ClElementwiseOperations.cpp", "src/runtime/gpu/cl/operators/ClFloor.cpp", "src/runtime/gpu/cl/operators/ClSub.cpp", "utils/CommonGraphOptions.cpp", diff --git a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h index c8c7e0c587..2b291517f3 100644 --- a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h +++ b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h @@ -33,189 +33,6 @@ class ICLTensor; class CLCompileContext; class ITensorInfo; -namespace experimental -{ -/** Basic function to run @ref arm_compute::opencl::kernels::ClSaturatedArithmeticKernel for division - * - * @note The tensor data type for the inputs must be F16/F32. - * @note The function performs an arithmetic division between two tensors. - */ -class CLArithmeticDivision : public ICLOperator -{ -public: - /** Default Constructor */ - CLArithmeticDivision(); - /** Initialise the kernel's inputs, output. - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] input1 First tensor input. Data types supported: F16/F32. - * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 Second tensor input. Same as @p input1. - * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output Output tensor. Data types supported: Same as @p input1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticDivision - * - * @param[in] input1 First tensor input info. Data types supported: F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - - // Inherited methods overridden: - void run(ITensorPack &tensors) override; -}; - -/** Basic function to run @ref arm_compute::opencl::kernels::ClArithmeticKernel for max - * - * @note The tensor data type for the inputs must be U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32. - * @note The function performs a max operation between two tensors. - */ -class CLElementwiseMax : public ICLOperator -{ -public: - /** Default Constructor */ - CLElementwiseMax(); - /** Initialise the kernel's inputs, output and conversion policy. - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32. - * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 Second tensor input. Data types supported: same as @p input1. - * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output Output tensor. Data types supported: same as @p input1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref arm_compute::opencl::kernels::ClArithmeticKernel for max - * - * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: same as @p input1. - * @param[in] output Output tensor info. Data types supported: same as @p input1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - - // Inherited methods overridden: - void run(ITensorPack &tensors) override; -}; - -/** Basic function to run @ref arm_compute::opencl::kernels::ClArithmeticKernel for min - * - * @note The tensor data type for the inputs must be U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32. - * @note The function performs a max operation between two tensors. - */ -class CLElementwiseMin : public ICLOperator -{ -public: - /** Default Constructor */ - CLElementwiseMin(); - /** Initialise the kernel's inputs, output and conversion policy. - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32. - * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 Second tensor input. Data types supported: same as @p input1. - * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output Output tensor. Data types supported: same as @p input1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref arm_compute::opencl::kernels::ClArithmeticKernel for min - * - * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: same as @p input1. - * @param[in] output Output tensor info. Data types supported: same as @p input1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - - // Inherited methods overridden: - void run(ITensorPack &tensors) override; -}; - -/** Basic function to run @ref arm_compute::opencl::kernels::ClArithmeticKernel for squared difference - * - * @note The tensor data type for the inputs must be QASYMM8/U8/S16/QSYMM16/F16/F32. - * @note The function performs a squared different operation between two tensors (i.e., out[i] = (in1[i] - in2[i])^2 - */ -class CLElementwiseSquaredDiff : public ICLOperator -{ -public: - /** Default Constructor */ - CLElementwiseSquaredDiff(); - /** Initialise the kernel's inputs, output and conversion policy. - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 Second tensor input. Data types supported: same as @p input1. - * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output Output tensor. Data types supported: same as @p input1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref arm_compute::opencl::kernels::ClArithmeticKernel for squared difference - * - * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: same as @p input1. - * @param[in] output Output tensor info. Data types supported: same as @p input1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - - // Inherited methods overridden: - void run(ITensorPack &tensors) override; -}; - -/** Basic function to run @ref arm_compute::opencl::kernels::ClArithmeticKernel for power - * - * @note The tensor data type for the inputs must be F16/F32. - * @note The function performs an elementwise power of in1 to in2 (i.e., out[i] = in1[i] ^ in2[i]) - */ -class CLElementwisePower : public ICLOperator -{ -public: - /** Default Constructor */ - CLElementwisePower(); - /** Initialise the kernel's inputs, output and conversion policy. - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] input1 First tensor input. Data types supported: F16/F32. - * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 Second tensor input. Data types supported: F16/F32. - * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output Output tensor. Data types supported:F16/F32. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref arm_compute::opencl::kernels::ClArithmeticKernel for power - * - * @param[in] input1 First tensor input info. Data types supported: F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: F16/F32. - * @param[in] output Output tensor info. Data types supported: F16/F32. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - - // Inherited methods overridden: - void run(ITensorPack &tensors) override; -}; -} // namespace experimental - /** Basic function to run @ref opencl::kernels::ClSaturatedArithmeticKernel for addition * * @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. diff --git a/src/runtime/CL/functions/CLElementwiseOperations.cpp b/src/runtime/CL/functions/CLElementwiseOperations.cpp index 9b809eebc7..60c699cbb8 100644 --- a/src/runtime/CL/functions/CLElementwiseOperations.cpp +++ b/src/runtime/CL/functions/CLElementwiseOperations.cpp @@ -23,124 +23,17 @@ */ #include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/gpu/cl/kernels/ClElementwiseKernel.h" +#include "arm_compute/core/Types.h" +#include "src/core/CL/ICLKernel.h" #include "src/runtime/gpu/cl/operators/ClAdd.h" +#include "src/runtime/gpu/cl/operators/ClElementwiseOperations.h" #include "src/runtime/gpu/cl/operators/ClSub.h" -#include - namespace arm_compute { -namespace experimental -{ -CLArithmeticDivision::CLArithmeticDivision() -{ -} - -void CLArithmeticDivision::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique(); - k->configure(compile_context, ArithmeticOperation::DIV, input1, input2, output, act_info); - _kernel = std::move(k); -} - -Status CLArithmeticDivision::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) -{ - return arm_compute::opencl::kernels::ClArithmeticKernel::validate(ArithmeticOperation::DIV, input1, input2, output, act_info); -} - -void CLArithmeticDivision::run(ITensorPack &tensors) -{ - ICLOperator::run(tensors); -} - -CLElementwiseMax::CLElementwiseMax() -{ -} - -void CLElementwiseMax::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique(); - k->configure(compile_context, ArithmeticOperation::MAX, input1, input2, output, act_info); - _kernel = std::move(k); -} - -Status CLElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) -{ - return arm_compute::opencl::kernels::ClArithmeticKernel::validate(ArithmeticOperation::MAX, input1, input2, output, act_info); -} - -void CLElementwiseMax::run(ITensorPack &tensors) -{ - ICLOperator::run(tensors); -} - -CLElementwiseMin::CLElementwiseMin() -{ -} - -void CLElementwiseMin::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique(); - k->configure(compile_context, ArithmeticOperation::MIN, input1, input2, output, act_info); - _kernel = std::move(k); -} - -Status CLElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) -{ - return arm_compute::opencl::kernels::ClArithmeticKernel::validate(ArithmeticOperation::MIN, input1, input2, output, act_info); -} - -void CLElementwiseMin::run(ITensorPack &tensors) -{ - ICLOperator::run(tensors); -} - -CLElementwiseSquaredDiff::CLElementwiseSquaredDiff() -{ -} - -void CLElementwiseSquaredDiff::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique(); - k->configure(compile_context, ArithmeticOperation::SQUARED_DIFF, input1, input2, output, act_info); - _kernel = std::move(k); -} - -Status CLElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) -{ - return arm_compute::opencl::kernels::ClArithmeticKernel::validate(ArithmeticOperation::SQUARED_DIFF, input1, input2, output, act_info); -} - -void CLElementwiseSquaredDiff::run(ITensorPack &tensors) -{ - ICLOperator::run(tensors); -} - -CLElementwisePower::CLElementwisePower() -{ -} - -void CLElementwisePower::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique(); - k->configure(compile_context, ArithmeticOperation::POWER, input1, input2, output, act_info); - _kernel = std::move(k); -} - -Status CLElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) -{ - return arm_compute::opencl::kernels::ClArithmeticKernel::validate(ArithmeticOperation::POWER, input1, input2, output, act_info); -} - -void CLElementwisePower::run(ITensorPack &tensors) -{ - ICLOperator::run(tensors); -} -} // namespace experimental struct CLArithmeticAddition::Impl { const ICLTensor *src_0{ nullptr }; @@ -235,10 +128,10 @@ void CLArithmeticSubtraction::run() struct CLArithmeticDivision::Impl { - const ICLTensor *src_0{ nullptr }; - const ICLTensor *src_1{ nullptr }; - ICLTensor *dst{ nullptr }; - std::unique_ptr op{ nullptr }; + const ICLTensor *src_0{ nullptr }; + const ICLTensor *src_1{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; }; CLArithmeticDivision::CLArithmeticDivision() @@ -259,13 +152,13 @@ void CLArithmeticDivision::configure(const CLCompileContext &compile_context, co _impl->src_0 = input1; _impl->src_1 = input2; _impl->dst = output; - _impl->op = std::make_unique(); + _impl->op = std::make_unique(); _impl->op->configure(compile_context, input1->info(), input2->info(), output->info(), act_info); } Status CLArithmeticDivision::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) { - return experimental::CLArithmeticDivision::validate(input1, input2, output, act_info); + return opencl::ClElementwiseDivision::validate(input1, input2, output, act_info); } void CLArithmeticDivision::run() @@ -280,10 +173,10 @@ void CLArithmeticDivision::run() struct CLElementwiseMax::Impl { - const ICLTensor *src_0{ nullptr }; - const ICLTensor *src_1{ nullptr }; - ICLTensor *dst{ nullptr }; - std::unique_ptr op{ nullptr }; + const ICLTensor *src_0{ nullptr }; + const ICLTensor *src_1{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; }; CLElementwiseMax::CLElementwiseMax() @@ -304,13 +197,13 @@ void CLElementwiseMax::configure(const CLCompileContext &compile_context, ICLTen _impl->src_0 = input1; _impl->src_1 = input2; _impl->dst = output; - _impl->op = std::make_unique(); + _impl->op = std::make_unique(); _impl->op->configure(compile_context, input1->info(), input2->info(), output->info(), act_info); } Status CLElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) { - return experimental::CLElementwiseMax::validate(input1, input2, output, act_info); + return opencl::ClElementwiseMax::validate(input1, input2, output, act_info); } void CLElementwiseMax::run() @@ -325,10 +218,10 @@ void CLElementwiseMax::run() struct CLElementwiseMin::Impl { - const ICLTensor *src_0{ nullptr }; - const ICLTensor *src_1{ nullptr }; - ICLTensor *dst{ nullptr }; - std::unique_ptr op{ nullptr }; + const ICLTensor *src_0{ nullptr }; + const ICLTensor *src_1{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; }; CLElementwiseMin::CLElementwiseMin() @@ -349,13 +242,13 @@ void CLElementwiseMin::configure(const CLCompileContext &compile_context, ICLTen _impl->src_0 = input1; _impl->src_1 = input2; _impl->dst = output; - _impl->op = std::make_unique(); + _impl->op = std::make_unique(); _impl->op->configure(compile_context, input1->info(), input2->info(), output->info(), act_info); } Status CLElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) { - return experimental::CLElementwiseMin::validate(input1, input2, output, act_info); + return opencl::ClElementwiseMin::validate(input1, input2, output, act_info); } void CLElementwiseMin::run() @@ -370,10 +263,10 @@ void CLElementwiseMin::run() struct CLElementwiseSquaredDiff::Impl { - const ICLTensor *src_0{ nullptr }; - const ICLTensor *src_1{ nullptr }; - ICLTensor *dst{ nullptr }; - std::unique_ptr op{ nullptr }; + const ICLTensor *src_0{ nullptr }; + const ICLTensor *src_1{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; }; CLElementwiseSquaredDiff::CLElementwiseSquaredDiff() @@ -394,13 +287,13 @@ void CLElementwiseSquaredDiff::configure(const CLCompileContext &compile_context _impl->src_0 = input1; _impl->src_1 = input2; _impl->dst = output; - _impl->op = std::make_unique(); + _impl->op = std::make_unique(); _impl->op->configure(compile_context, input1->info(), input2->info(), output->info(), act_info); } Status CLElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) { - return experimental::CLElementwiseSquaredDiff::validate(input1, input2, output, act_info); + return opencl::ClElementwiseSquaredDiff::validate(input1, input2, output, act_info); } void CLElementwiseSquaredDiff::run() @@ -415,10 +308,10 @@ void CLElementwiseSquaredDiff::run() struct CLElementwisePower::Impl { - const ICLTensor *src_0{ nullptr }; - const ICLTensor *src_1{ nullptr }; - ICLTensor *dst{ nullptr }; - std::unique_ptr op{ nullptr }; + const ICLTensor *src_0{ nullptr }; + const ICLTensor *src_1{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; }; CLElementwisePower::CLElementwisePower() @@ -439,13 +332,13 @@ void CLElementwisePower::configure(const CLCompileContext &compile_context, ICLT _impl->src_0 = input1; _impl->src_1 = input2; _impl->dst = output; - _impl->op = std::make_unique(); + _impl->op = std::make_unique(); _impl->op->configure(compile_context, input1->info(), input2->info(), output->info(), act_info); } Status CLElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) { - return experimental::CLElementwisePower::validate(input1, input2, output, act_info); + return opencl::ClElementwisePower::validate(input1, input2, output, act_info); } void CLElementwisePower::run() diff --git a/src/runtime/gpu/cl/operators/ClElementwiseOperations.cpp b/src/runtime/gpu/cl/operators/ClElementwiseOperations.cpp new file mode 100644 index 0000000000..e5b836a0d8 --- /dev/null +++ b/src/runtime/gpu/cl/operators/ClElementwiseOperations.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/runtime/gpu/cl/operators/ClElementwiseOperations.h" + +#include "src/core/gpu/cl/kernels/ClElementwiseKernel.h" + +namespace arm_compute +{ +namespace opencl +{ +void ClElementwiseDivision::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info) +{ + auto k = std::make_unique(); + k->configure(compile_context, ArithmeticOperation::DIV, src1, src2, dst, act_info); + _kernel = std::move(k); +} + +Status ClElementwiseDivision::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info) +{ + return kernels::ClArithmeticKernel::validate(ArithmeticOperation::DIV, src1, src2, dst, act_info); +} + +void ClElementwiseMax::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info) +{ + auto k = std::make_unique(); + k->configure(compile_context, ArithmeticOperation::MAX, src1, src2, dst, act_info); + _kernel = std::move(k); +} + +Status ClElementwiseMax::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info) +{ + return kernels::ClArithmeticKernel::validate(ArithmeticOperation::MAX, src1, src2, dst, act_info); +} + +void ClElementwiseMin::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info) +{ + auto k = std::make_unique(); + k->configure(compile_context, ArithmeticOperation::MIN, src1, src2, dst, act_info); + _kernel = std::move(k); +} + +Status ClElementwiseMin::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info) +{ + return kernels::ClArithmeticKernel::validate(ArithmeticOperation::MIN, src1, src2, dst, act_info); +} + +void ClElementwiseSquaredDiff::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info) +{ + auto k = std::make_unique(); + k->configure(compile_context, ArithmeticOperation::SQUARED_DIFF, src1, src2, dst, act_info); + _kernel = std::move(k); +} + +Status ClElementwiseSquaredDiff::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info) +{ + return kernels::ClArithmeticKernel::validate(ArithmeticOperation::SQUARED_DIFF, src1, src2, dst, act_info); +} + +void ClElementwisePower::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info) +{ + auto k = std::make_unique(); + k->configure(compile_context, ArithmeticOperation::POWER, src1, src2, dst, act_info); + _kernel = std::move(k); +} + +Status ClElementwisePower::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info) +{ + return kernels::ClArithmeticKernel::validate(ArithmeticOperation::POWER, src1, src2, dst, act_info); +} +} // namespace opencl +} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClElementwiseOperations.h b/src/runtime/gpu/cl/operators/ClElementwiseOperations.h new file mode 100644 index 0000000000..b9ab1405c8 --- /dev/null +++ b/src/runtime/gpu/cl/operators/ClElementwiseOperations.h @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CL_ELEMENTWISE_OPERATIONS_H +#define ARM_COMPUTE_CL_ELEMENTWISE_OPERATIONS_H + +#include "src/core/gpu/cl/ClCompileContext.h" +#include "src/runtime/gpu/cl/IClOperator.h" + +namespace arm_compute +{ +namespace opencl +{ +/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for division + * + * @note The tensor data type for the inputs must be F16/F32. + * @note The function performs an arithmetic division between two tensors. + */ +class ClElementwiseDivision : public IClOperator +{ +public: + /** Default Constructor */ + ClElementwiseDivision() = default; + /** Configure function for a given list of arguments. + * + * @param[in] compile_context The compile context to be used. + * @param[in] src1 First source tensor info. Data types supported: F16/F32. + * @param[in] src2 Second source tensor info. same as @p src1. + * @param[out] dst Destination tensor info. Data types supported: same as @p src1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref ClElementwiseDivision + * + * @param[in] src1 First source tensor info. Data types supported: F16/F32. + * @param[in] src2 Second source tensor info. Data types supported: same as @p src1. + * @param[in] dst Destination tensor info. Data types supported: same as @p src1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * + * @return a status + */ + static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); +}; + +/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for max + * + * @note The tensor data type for the inputs must be U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32. + * @note The function performs a max operation between two tensors. + */ +class ClElementwiseMax : public IClOperator +{ +public: + /** Default Constructor */ + ClElementwiseMax() = default; + /** Configure function for a given list of arguments. + * + * @param[in] compile_context The compile context to be used. + * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32. + * @param[in] src2 Second source tensor info. Data types supported: same as @p src1. + * @param[out] dst Destination tensor info. Data types supported: same as @p src1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref opencl::kernels::ClArithmeticKernel for max + * + * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32. + * @param[in] src2 Second source tensor info. Data types supported: same as @p src1. + * @param[in] dst Destination tensor info. Data types supported: same as @p src1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * + * @return a status + */ + static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); +}; + +/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for min + * + * @note The tensor data type for the inputs must be U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32. + * @note The function performs a max operation between two tensors. + */ +class ClElementwiseMin : public IClOperator +{ +public: + /** Default Constructor */ + ClElementwiseMin() = default; + /** Configure function for a given list of arguments. + * + * @param[in] compile_context The compile context to be used. + * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32. + * @param[in] src2 Second source tensor info. Data types supported: same as @p src1. + * @param[out] dst Destination tensor info. Data types supported: same as @p src1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref opencl::kernels::ClArithmeticKernel for min + * + * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32. + * @param[in] src2 Second source tensor info. Data types supported: same as @p src1. + * @param[in] dst Destination tensor info. Data types supported: same as @p src1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * + * @return a status + */ + static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); +}; + +/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for squared difference + * + * @note The tensor data type for the inputs must be QASYMM8/U8/S16/QSYMM16/F16/F32. + * @note The function performs a squared different operation between two tensors (i.e., out[i] = (in1[i] - in2[i])^2 + */ +class ClElementwiseSquaredDiff : public IClOperator +{ +public: + /** Default Constructor */ + ClElementwiseSquaredDiff() = default; + /** Configure function for a given list of arguments. + * + * @param[in] compile_context The compile context to be used. + * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[in] src2 Second source tensor info. Data types supported: same as @p src1. + * @param[out] dst Destination tensor info. Data types supported: same as @p src1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref opencl::kernels::ClArithmeticKernel for squared difference + * + * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[in] src2 Second source tensor info. Data types supported: same as @p src1. + * @param[in] dst Destination tensor info. Data types supported: same as @p src1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * + * @return a status + */ + static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); +}; + +/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for power + * + * @note The tensor data type for the inputs must be F16/F32. + * @note The function performs an elementwise power of in1 to in2 (i.e., out[i] = in1[i] ^ in2[i]) + */ +class ClElementwisePower : public IClOperator +{ +public: + /** Default Constructor */ + ClElementwisePower() = default; + /** Configure function for a given list of arguments. + * + * @param[in] compile_context The compile context to be used. + * @param[in] src1 First source tensor info. Data types supported: F16/F32. + * @param[in] src2 Second source tensor info. Data types supported: F16/F32. + * @param[out] dst Destination tensor info. Data types supported:F16/F32. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref opencl::kernels::ClArithmeticKernel for power + * + * @param[in] src1 First source tensor info. Data types supported: F16/F32. + * @param[in] src2 Second source tensor info. Data types supported: F16/F32. + * @param[in] dst Destination tensor info. Data types supported: F16/F32. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * + * @return a status + */ + static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); +}; +} // namespace opencl +} // namespace arm_compute +#endif /* ARM_COMPUTE_CL_ELEMENTWISE_OPERATIONS_H */ -- cgit v1.2.1