diff options
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu')
5 files changed, 118 insertions, 31 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp index 9a218b3e75..2611d6d575 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp @@ -37,7 +37,8 @@ namespace { std::set<ElementwiseBinaryCommonAttributes::ElementwiseOp> supported_ops { - ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD + ElementwiseBinaryCommonAttributes::ElementwiseOp::Add, + ElementwiseBinaryCommonAttributes::ElementwiseOp::Mul }; } diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp index a02160cba8..e7ee1c10df 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp @@ -22,8 +22,8 @@ * SOFTWARE. */ #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" -#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" #include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h" namespace arm_compute @@ -36,9 +36,13 @@ Status GpuAdd::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *lhs, const ITensorInfo *rhs) { - // Set the elementwise operation to ADD then call the elementwise common validate_op + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, DataType::S16, DataType::S32); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type"); + + // Set the elementwise operation to Add then call the elementwise common validate_op ElementwiseBinaryCommonAttributes common_attributes{}; - common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD); + common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Add); return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, common_attributes); } @@ -46,9 +50,13 @@ Status GpuAdd::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *lhs, const ITensorInfo *rhs) { - // Set the elementwise operation to ADD then call the elementwise common is_supported_op + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, DataType::S16, DataType::S32); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type"); + + // Set the elementwise operation to Add then call the elementwise common is_supported_op ElementwiseBinaryCommonAttributes common_attributes{}; - common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD); + common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Add); return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, common_attributes); } @@ -57,9 +65,9 @@ ITensorInfo *GpuAdd::create_op(GpuWorkloadSketch &sketch, ITensorInfo *rhs) { // No need to log or validate as they'll be handled inside GpuElementwiseBinaryCommon::create_op() - // Set the elementwise operation to ADD then call the elementwise common create_op + // Set the elementwise operation to Add then call the elementwise common create_op ElementwiseBinaryCommonAttributes common_attributes{}; - common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD); + common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Add); return GpuElementwiseBinaryCommon::create_op(sketch, lhs, rhs, common_attributes); } diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp new file mode 100644 index 0000000000..464a32cbad --- /dev/null +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" + +#include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +Status GpuMul::validate_op(const GpuWorkloadSketch &sketch, + const ITensorInfo *lhs, + const ITensorInfo *rhs) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type"); + + // Set the elementwise operation to Mul then call the elementwise common validate_op + ElementwiseBinaryCommonAttributes common_attributes{}; + common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Mul); + return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, common_attributes); +} + +Status GpuMul::is_supported_op(const GpuWorkloadContext &context, + const ITensorInfo *lhs, + const ITensorInfo *rhs) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type"); + + // Set the elementwise operation to Mul then call the elementwise common is_supported_op + ElementwiseBinaryCommonAttributes common_attributes{}; + common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Mul); + return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, common_attributes); +} + +ITensorInfo *GpuMul::create_op(GpuWorkloadSketch &sketch, + ITensorInfo *lhs, + ITensorInfo *rhs) +{ + // Set the elementwise operation to Mul then call the elementwise common create_op + ElementwiseBinaryCommonAttributes common_attributes{}; + common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Mul); + return GpuElementwiseBinaryCommon::create_op(sketch, lhs, rhs, common_attributes); +} + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h index cbefa379e6..0b58b6eb96 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h +++ b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h @@ -25,11 +25,12 @@ #define SRC_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_INTERNAL_GPUELEMENTWISEBINARYCOMMON #include "arm_compute/core/Error.h" -#include "arm_compute/core/ITensorInfo.h" namespace arm_compute { +/** Forward declaration */ class ITensorInfo; + namespace experimental { namespace dynamic_fusion @@ -39,14 +40,15 @@ class ElementwiseBinaryCommonAttributes public: enum class ElementwiseOp { - ADD, /**< (x + y) */ - SUB, /**< (x - y) */ - DIV, /**< (x / y) */ - MIN, /**< Min(x, y) */ - MAX, /**< Max(x, y) */ - SQUARED_DIFF, /**< (x - y)^2 */ - POWER, /**< x ^ y */ - PRELU, /**< y*x if x < 0, x otherwise */ + Add, /**< (x + y) */ + Sub, /**< (x - y) */ + Div, /**< (x / y) */ + Mul, /**< (x * y) */ + Min, /**< Min(x, y) */ + Max, /**< Max(x, y) */ + SquaredDiff, /**< (x - y)^2 */ + Power, /**< x ^ y */ + Prelu, /**< y*x if x < 0, x otherwise */ }; /** Set operation*/ ElementwiseBinaryCommonAttributes &operation(const ElementwiseBinaryCommonAttributes::ElementwiseOp &operation); diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp index 01017ed909..0dd7ca5e78 100644 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp +++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp @@ -68,7 +68,7 @@ std::string ClTemplateElementwiseBinary::get_component_code(const ComponentGroup code = R"_( - //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_OP --------------------- + //------------------ START KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} --------------------- )_"; if(is_root) @@ -139,7 +139,7 @@ R"_( code += R"_( } - //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_OP --------------------- + //------------------ END KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} --------------------- )_"; return code; @@ -168,33 +168,34 @@ void ClTemplateElementwiseBinary::declare_variables(GpuKernelVariableTable &vtab TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const { - TagLUT lut{}; + TagLUT lut{}; // Local build options lut["meta_kernel_id"] = id(); lut["DATA_TYPE"] = get_cl_type_from_data_type(_lhs->data_type()); // Arguments and global shared variables - lut["lhs"] = vtable.get_variable(_lhs); - lut["rhs"] = vtable.get_variable(_rhs); - lut["dst"] = vtable.get_variable(_dst); + lut["lhs"] = vtable.get_variable(_lhs); + lut["rhs"] = vtable.get_variable(_rhs); + lut["dst"] = vtable.get_variable(_dst); lut["arg_dst"] = vtable.get_variable(comp_group.get_any_dst_tensor()); switch(_attributes.operation()) { - case Attributes::ElementwiseOp::ADD: + case Attributes::ElementwiseOp::Add: lut["ELTWISE_OP"] = "ADD"; break; + case Attributes::ElementwiseOp::Mul: + lut["ELTWISE_OP"] = "MUL"; + break; default: ARM_COMPUTE_ERROR("Arithmetic Operation not supported"); } ARM_COMPUTE_ERROR_ON( - comp_group.is_intermediate_tensor(_lhs) && - detail::have_different_dimensions(_lhs->tensor_shape(), _dst->tensor_shape(), 0)); + comp_group.is_intermediate_tensor(_lhs) && detail::have_different_dimensions(_lhs->tensor_shape(), _dst->tensor_shape(), 0)); ARM_COMPUTE_ERROR_ON( - comp_group.is_intermediate_tensor(_rhs) && - detail::have_different_dimensions(_rhs->tensor_shape(), _dst->tensor_shape(), 0)); + comp_group.is_intermediate_tensor(_rhs) && detail::have_different_dimensions(_rhs->tensor_shape(), _dst->tensor_shape(), 0)); // Set broadcast parameters // PRE: All tensors are broadcast-compatible @@ -222,9 +223,9 @@ TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vt lut["rhs_m0"] = (rhs_broadcast_yz) ? "1" : "M0"; lut["rhs_start_ind_1"] = (rhs_broadcast_yz) ? "0" : "g_ind_1"; - lut["BROADCAST_OP"] = (lhs_broadcast_yz) ? "BROADCAST_LHS_X_" : - (rhs_broadcast_yz) ? "BROADCAST_RHS_X_" : - ""; + lut["BROADCAST_OP"] = (lhs_broadcast_yz) ? "BROADCAST_LHS_X_" : + (rhs_broadcast_yz) ? "BROADCAST_RHS_X_" : + ""; return lut; } |