From b1fcefddf3f59219a9d7930d607175b7e6c39347 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Wed, 15 Jun 2022 19:02:28 +0100 Subject: Implement new Elementwise Dynamic Fusion Operators: Div, Floor Resolves: COMPMID-5355 Change-Id: I92f73fbe885f28bbe7b07965b90cfd807c93602f Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7745 Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: SiCong Li --- Android.bp | 3 +- arm_compute/core/experimental/OperatorGraph.h | 33 +++- .../cl_fused_conv2d_elementwise_add.cpp | 12 +- filelist.json | 3 +- src/core/CL/cl_kernels/common/gemmlowp.cl | 4 +- src/core/CL/cl_kernels/nhwc/direct_convolution.cl | 4 +- .../CL/cl_kernels/nhwc/direct_convolution3d.cl | 4 +- src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl | 4 +- .../cl_kernels/nhwc/dwc_native_quantized_nhwc.cl | 4 +- .../cl_kernels/nhwc/winograd_output_transform.cl | 12 +- src/core/CL/cl_kernels/tile_helpers.h | 46 ++++- .../dynamic_fusion/ClKernelBuildingAPI.cpp | 20 +- .../dynamic_fusion/ClKernelBuildingAPI.h | 10 +- .../ClDirectConvolutionKernelComponent.cpp | 2 +- .../components/ClElementwiseAddKernelComponent.cpp | 207 ------------------- .../components/ClElementwiseAddKernelComponent.h | 75 ------- .../components/ClElementwiseKernelComponent.cpp | 219 +++++++++++++++++++++ .../components/ClElementwiseKernelComponent.h | 76 +++++++ .../components/ClFloorKernelComponent.cpp | 130 ++++++++++++ .../components/ClFloorKernelComponent.h | 74 +++++++ .../components/ClKernelComponents.h | 3 +- .../experimental/dynamic_fusion/OperatorGraph.cpp | 33 +++- .../WorkloadImpl/ClKernelDescriptors.h | 18 +- .../dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp | 63 +++++- .../dynamic_fusion/WorkloadImpl/ClKernelGraph.h | 30 ++- .../WorkloadImpl/OperatorGraphImpl.cpp | 55 +++++- .../WorkloadImpl/OperatorGraphImpl.h | 42 +++- .../CL/UNIT/dynamic_fusion/ClCompositeKernel.cpp | 9 +- tests/validation/CL/UNIT/dynamic_fusion/Floor.cpp | 135 +++++++++++++ .../Integration_OperatorFuseMovenetSubGraph1.cpp | 30 +-- 30 files changed, 982 insertions(+), 378 deletions(-) delete mode 100644 src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp delete mode 100644 src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.h create mode 100644 src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp create mode 100644 src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h create mode 100644 src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp create mode 100644 src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h create mode 100644 tests/validation/CL/UNIT/dynamic_fusion/Floor.cpp diff --git a/Android.bp b/Android.bp index 9f6f8defdf..74c2b96c37 100644 --- a/Android.bp +++ b/Android.bp @@ -370,7 +370,8 @@ cc_library_static { "src/core/Version.cpp", "src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp", "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp", - "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp", + "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp", + "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp", "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.cpp", "src/core/experimental/dynamic_fusion/OperatorGraph.cpp", "src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.cpp", diff --git a/arm_compute/core/experimental/OperatorGraph.h b/arm_compute/core/experimental/OperatorGraph.h index fd8fcd5c47..cab83c7f8b 100644 --- a/arm_compute/core/experimental/OperatorGraph.h +++ b/arm_compute/core/experimental/OperatorGraph.h @@ -176,17 +176,18 @@ Operator add_op_conv2d(OperatorGraph &graph, const Conv2dDescriptor &desc, OpTen */ void force_conv2d_method(OperatorGraph &graph, Operator conv2d, ConvolutionMethod method); -/** Descriptor for Addition operation +/** Descriptor for Elementwise binary operation * */ -struct AddDescriptor +struct ElementwiseDescriptor { /* TOSA compliant attribute parameters start */ /* TOSA compliant attribute parameters end */ /* Non-TOSA compliant attribute parameters start */ + ArithmeticOperation op; /* Non-TOSA compliant attribute parameters end */ }; -/** Add op Add to @p graph, and optionally describes fusion through passing of intermediate @ref OpTensor s +/** Add op Elementwise to @p graph, and optionally describes fusion through passing of intermediate @ref OpTensor s * * @param[in,out] graph OperatorGraph where the operator is added to * @param[in] desc Operator descriptor @@ -196,12 +197,34 @@ struct AddDescriptor * * @return Operator */ -Operator add_op_elementwise_add(OperatorGraph &graph, const AddDescriptor &desc, OpTensor lhs, OpTensor rhs, OpTensor dst); +Operator add_op_elementwise_op(OperatorGraph &graph, const ElementwiseDescriptor &desc, OpTensor lhs, OpTensor rhs, OpTensor dst); + +/** Descriptor for Floor operation + * + */ +struct FloorDescriptor +{ + /* TOSA compliant attribute parameters start */ + /* TOSA compliant attribute parameters end */ + /* Non-TOSA compliant attribute parameters start */ + /* Non-TOSA compliant attribute parameters end */ +}; +/** Add op Floor to @p graph, and optionally describes fusion through passing of intermediate @ref OpTensor s + * + * @param[in,out] graph OperatorGraph where the operator is added to + * @param[in] desc Operator descriptor + * @param[in] src Source OpTensor + * @param[in] dst Destination OpTensor + * + * @return Operator + */ +Operator add_op_floor(OperatorGraph &graph, const FloorDescriptor &desc, OpTensor src, OpTensor dst); bool operator==(const OpTensor &t0, const OpTensor &t1); bool operator==(const Padding2D &pad0, const Padding2D &pad1); bool operator==(const Conv2dDescriptor &conv2d0, const Conv2dDescriptor &conv2d1); -bool operator==(const AddDescriptor &, const AddDescriptor &); +bool operator==(const ElementwiseDescriptor &, const ElementwiseDescriptor &); +bool operator==(const FloorDescriptor &, const FloorDescriptor &); } // namespace dynamic_fusion } // namespace experimental diff --git a/examples/dynamic_fusion/cl_fused_conv2d_elementwise_add.cpp b/examples/dynamic_fusion/cl_fused_conv2d_elementwise_add.cpp index 285509b586..8aee67e8a4 100644 --- a/examples/dynamic_fusion/cl_fused_conv2d_elementwise_add.cpp +++ b/examples/dynamic_fusion/cl_fused_conv2d_elementwise_add.cpp @@ -218,12 +218,12 @@ public: /// @snippet dynamic_fusion/cl_fused_conv2d_elementwise_add.cpp Add Elementwise Add Operator // [Add Elementwise Add Operator] - auto t_l1_addend_info = TensorInfo(t_l1_addend_shape, 1, data_type, data_layout); - auto t_dst_info = TensorInfo(); - const auto op_t_l1_addend = add_tensor(op_graph, t_l1_addend_info); - const auto op_t_dst = add_tensor(op_graph, t_dst_info); - AddDescriptor add_desc{}; - add_op_elementwise_add(op_graph, add_desc, op_t_acc, op_t_l1_addend, op_t_dst); + auto t_l1_addend_info = TensorInfo(t_l1_addend_shape, 1, data_type, data_layout); + auto t_dst_info = TensorInfo(); + const auto op_t_l1_addend = add_tensor(op_graph, t_l1_addend_info); + const auto op_t_dst = add_tensor(op_graph, t_dst_info); + ElementwiseDescriptor add_desc{ ArithmeticOperation::ADD }; + add_op_elementwise_op(op_graph, add_desc, op_t_acc, op_t_l1_addend, op_t_dst); // [Add Elementwise Add Operator] /// @page example_dynamic_fusion_cl_conv2d_elementwise_add diff --git a/filelist.json b/filelist.json index 681cacdf02..e22ec1d18f 100644 --- a/filelist.json +++ b/filelist.json @@ -2097,7 +2097,8 @@ "dynamic_fusion": [ "src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp", "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp", - "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp", + "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp", + "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp", "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.cpp", "src/gpu/cl/kernels/experimental/dynamic_fusion/ClCompositeKernel.cpp", diff --git a/src/core/CL/cl_kernels/common/gemmlowp.cl b/src/core/CL/cl_kernels/common/gemmlowp.cl index 9fad66df68..53ce296948 100644 --- a/src/core/CL/cl_kernels/common/gemmlowp.cl +++ b/src/core/CL/cl_kernels/common/gemmlowp.cl @@ -703,7 +703,7 @@ __kernel void gemmlowp_mm_reshaped_only_rhs_t a_offset_s32[0].v *= A_OFFSET; - T_ADD_BROADCAST_X(int, M0, N0, offset_s32, a_offset_s32, offset_s32); + T_ELTWISE_BROADCAST_ADD_X(int, M0, N0, offset_s32, a_offset_s32, offset_s32); #endif // defined(A_OFFSET) #if defined(B_OFFSET) @@ -728,7 +728,7 @@ __kernel void gemmlowp_mm_reshaped_only_rhs_t T_LOAD(int, 1, N0, BUFFER, biases, xo, 0, 1, 0, bias); - T_ADD_BROADCAST_X(int, M0, N0, offset_s32, bias, offset_s32); + T_ELTWISE_BROADCAST_ADD_X(int, M0, N0, offset_s32, bias, offset_s32); #endif // defined(ADD_BIAS) LOOP_UNROLLING(int, i, 0, 1, M0, diff --git a/src/core/CL/cl_kernels/nhwc/direct_convolution.cl b/src/core/CL/cl_kernels/nhwc/direct_convolution.cl index f1b422a68f..d34e24b436 100644 --- a/src/core/CL/cl_kernels/nhwc/direct_convolution.cl +++ b/src/core/CL/cl_kernels/nhwc/direct_convolution.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -233,7 +233,7 @@ __kernel void direct_convolution_nhwc( T_LOAD(BIA_DATA_TYPE, 1, N0, BUFFER, bia, cout, 0, 1, 0, bias0); // c = c + bias[broadcasted] - T_ADD_BROADCAST_X(ACC_DATA_TYPE, M0, N0, c, bias0, c); + T_ELTWISE_BROADCAST_ADD_X(ACC_DATA_TYPE, M0, N0, c, bias0, c); #endif // HAS_BIAS diff --git a/src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl b/src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl index 587f3984ab..807b990e82 100644 --- a/src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl +++ b/src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -254,7 +254,7 @@ __kernel void direct_convolution3d_ndhwc( } // c = c + bias[broadcasted] - T_ADD_BROADCAST_X(ACC_DATA_TYPE, M0, N0, c, bias0, c); + T_ELTWISE_BROADCAST_ADD_X(ACC_DATA_TYPE, M0, N0, c, bias0, c); #endif // HAS_BIAS diff --git a/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl b/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl index 4f57a81e7b..b24a6ae85f 100644 --- a/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl +++ b/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -175,7 +175,7 @@ __kernel void dwc_native_fp_nhwc( T_LOAD(BIA_DATA_TYPE, 1, N0, BUFFER, bia, (cout * DEPTH_MULTIPLIER) + d, 0, 0, 0, bias0); // c = c + bias[broadcasted] - T_ADD_BROADCAST_X(ACC_DATA_TYPE, M0, N0, c, bias0, c); + T_ELTWISE_BROADCAST_ADD_X(ACC_DATA_TYPE, M0, N0, c, bias0, c); #endif // HAS_BIAS T_ACTIVATION(ACC_DATA_TYPE, M0, N0, ACTIVATION_TYPE, A_VAL, B_VAL, c, c); diff --git a/src/core/CL/cl_kernels/nhwc/dwc_native_quantized_nhwc.cl b/src/core/CL/cl_kernels/nhwc/dwc_native_quantized_nhwc.cl index ec2593af71..263a23ef28 100644 --- a/src/core/CL/cl_kernels/nhwc/dwc_native_quantized_nhwc.cl +++ b/src/core/CL/cl_kernels/nhwc/dwc_native_quantized_nhwc.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -236,7 +236,7 @@ __kernel void dwc_native_quantized_nhwc( T_LOAD(BIA_DATA_TYPE, 1, N0, BUFFER, bia, cout * DEPTH_MULTIPLIER + d, 0, 0, 0, bias0); // c = c + bias[broadcasted] - T_ADD_BROADCAST_X(ACC_DATA_TYPE, M0, N0, c, bias0, c); + T_ELTWISE_BROADCAST_ADD_X(ACC_DATA_TYPE, M0, N0, c, bias0, c); #endif // HAS_BIAS T_LOAD_MULTIPLIERS_SHIFT(QUANTIZATION_TYPE); diff --git a/src/core/CL/cl_kernels/nhwc/winograd_output_transform.cl b/src/core/CL/cl_kernels/nhwc/winograd_output_transform.cl index bab2ee850c..0883cd99c8 100644 --- a/src/core/CL/cl_kernels/nhwc/winograd_output_transform.cl +++ b/src/core/CL/cl_kernels/nhwc/winograd_output_transform.cl @@ -111,7 +111,7 @@ __kernel void winograd_output_transform_2x2_7x7_nhwc( T_LOAD(DATA_TYPE, 1, N0, BUFFER, bias, cout, 0, 1, 0, b); - T_ADD_BROADCAST_X(DATA_TYPE, 2, N0, out, b, out); + T_ELTWISE_BROADCAST_ADD_X(DATA_TYPE, 2, N0, out, b, out); #endif // defined(HAS_BIAS) T_ACTIVATION(DATA_TYPE, 2, N0, ACTIVATION_TYPE, A_VAL, B_VAL, out, out); @@ -177,7 +177,7 @@ __kernel void winograd_output_transform_2x2_7x7_nhwc( T_LOAD(DATA_TYPE, 1, N0, BUFFER, bias, cout, 0, 1, 0, b); - T_ADD_BROADCAST_X(DATA_TYPE, 4, N0, out, b, out); + T_ELTWISE_BROADCAST_ADD_X(DATA_TYPE, 4, N0, out, b, out); #endif // defined(HAS_BIAS) T_ACTIVATION(DATA_TYPE, 4, N0, ACTIVATION_TYPE, A_VAL, B_VAL, out, out); @@ -287,7 +287,7 @@ __kernel void winograd_output_transform_4x4_3x3_nhwc( T_LOAD(DATA_TYPE, 1, N0, BUFFER, bias, cout, 0, 1, 0, b); // c = c + bias[broadcasted] - T_ADD_BROADCAST_X(DATA_TYPE, 4, N0, out, b, out); + T_ELTWISE_BROADCAST_ADD_X(DATA_TYPE, 4, N0, out, b, out); #endif // HAS_BIAS int x_out = (mout % NUM_TILES_X) * OUTPUT_TILE_W; @@ -374,7 +374,7 @@ __kernel void winograd_output_transform_4x4_3x3_nhwc( T_LOAD(DATA_TYPE, 1, N0, BUFFER, bias, cout, 0, 1, 0, b); // c = c + bias[broadcasted] - T_ADD_BROADCAST_X(DATA_TYPE, 16, N0, out, b, out); + T_ELTWISE_BROADCAST_ADD_X(DATA_TYPE, 16, N0, out, b, out); #endif // HAS_BIAS int x_out = (mout % NUM_TILES_X) * OUTPUT_TILE_W; @@ -488,7 +488,7 @@ __kernel void winograd_output_transform_4x4_5x5_nhwc( T_LOAD(DATA_TYPE, 1, N0, BUFFER, bias, cout, 0, 1, 0, b); // c = c + bias[broadcasted] - T_ADD_BROADCAST_X(DATA_TYPE, 4, N0, out, b, out); + T_ELTWISE_BROADCAST_ADD_X(DATA_TYPE, 4, N0, out, b, out); #endif // HAS_BIAS int x_out = (mout % NUM_TILES_X) * OUTPUT_TILE_W; @@ -586,7 +586,7 @@ __kernel void winograd_output_transform_4x4_5x5_nhwc( T_LOAD(DATA_TYPE, 1, N0, BUFFER, bias, cout, 0, 1, 0, b); // c = c + bias[broadcasted] - T_ADD_BROADCAST_X(DATA_TYPE, 16, N0, out, b, out); + T_ELTWISE_BROADCAST_ADD_X(DATA_TYPE, 16, N0, out, b, out); #endif // HAS_BIAS int x_out = (mout % NUM_TILES_X) * OUTPUT_TILE_W; diff --git a/src/core/CL/cl_kernels/tile_helpers.h b/src/core/CL/cl_kernels/tile_helpers.h index 69898e1bb4..0ce343e3ec 100644 --- a/src/core/CL/cl_kernels/tile_helpers.h +++ b/src/core/CL/cl_kernels/tile_helpers.h @@ -970,6 +970,9 @@ #define ACT_OP_QUANTIZED(op, DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x) op##_op_quantized(DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x) #define ACTIVATION_QUANTIZED(op, DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x) ACT_OP_QUANTIZED(op, DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x) +#define T_ADD(A_VAL, B_VAL) ((A_VAL) + (B_VAL)) +#define T_DIV(A_VAL, B_VAL) ((A_VAL) / (B_VAL)) + /** Element-wise activation for quantized types * * @note Performs: activation(LHS) = DST @@ -1014,11 +1017,15 @@ }) \ }) -/** Element-wise addition with RHS broadcasted (RHS has the X dimension only) +#define T_ELTWISE_BROADCAST_ADD_X(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_X(T_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) +#define T_ELTWISE_BROADCAST_DIV_X(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_X(T_DIV, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) + +/** Element-wise operation with RHS broadcasted (RHS has the X dimension only) * - * @note Performs: LHS + RHS[broadcasted] = DST + * @note Performs: LHS OP RHS[broadcasted] = DST * @note Both tiles must have same data type * + * @param[in] T_ELWISE_OP Elementwise operator to perform * @param[in] DST_DATA_TYPE DST data type * @param[in] M0 Number of LHS rows * @param[in] N0 Number of LHS columns @@ -1026,19 +1033,23 @@ * @param[in] rhs RHS tile * @param[out] dst DST tile */ -#define T_ADD_BROADCAST_X(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) \ +#define T_ELTWISE_BROADCAST_X(T_ELWISE_OP, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) \ ({ \ LOOP_UNROLLING(int, _m0, 0, 1, M0, \ { \ - dst[_m0].v = CONVERT(lhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)) + CONVERT(rhs[0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)); \ + dst[_m0].v = T_ELWISE_OP(CONVERT(lhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)), CONVERT(rhs[0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0))); \ }) \ }) -/** Element-wise addition between two tiles (LHS and RHS) +#define T_ELTWISE_ADD(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE(T_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) +#define T_ELTWISE_DIV(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE(T_DIV, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) + +/** Element-wise operation between two tiles (LHS and RHS) * - * @note Performs: LHS + RHS = DST + * @note Performs: LHS OP RHS = DST * @note Both tiles must have same data type * + * @param[in] T_ELWISE_OP Elementwise operator to perform * @param[in] DST_DATA_TYPE DST data type * @param[in] M0 Number of LHS rows * @param[in] N0 Number of LHS columns @@ -1046,11 +1057,30 @@ * @param[in] rhs RHS tile * @param[out] dst DST tile */ -#define T_ADD(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) \ +#define T_ELTWISE(T_ELWISE_OP, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) \ + ({ \ + LOOP_UNROLLING(int, _m0, 0, 1, M0, \ + { \ + dst[_m0].v = T_ELWISE_OP(CONVERT(lhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)), CONVERT(rhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0))); \ + }) \ + }) + +/** Floor operation on a tile + * + * @note Performs: floor(SRC) = DST + * @note Both tiles must have same data type + * + * @param[in] DST_DATA_TYPE DST data type + * @param[in] M0 Number of SRC rows + * @param[in] N0 Number of SRC columns + * @param[in] src LHS tile + * @param[out] dst DST tile + */ +#define T_FLOOR(DST_DATA_TYPE, M0, N0, src, dst) \ ({ \ LOOP_UNROLLING(int, _m0, 0, 1, M0, \ { \ - dst[_m0].v = CONVERT(lhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)) + CONVERT(rhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)); \ + dst[_m0].v = floor(CONVERT(src[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0))); \ }) \ }) diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp index 0be2ba02b5..9b6daae619 100644 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp @@ -55,18 +55,32 @@ Status add_tensor(ClKernelBlueprint &kernel_blueprint, ITensorInfo *tensor_info, return Status{}; } -Status add_kcomp_eltwise_add(ClKernelBlueprint &kernel_blueprint, const ClEltwiseAddKernelDescriptor &, - ArgumentID src0_id, ArgumentID src1_id, ArgumentID &dst_id) +Status add_kcomp_eltwise_op(ClKernelBlueprint &kernel_blueprint, const ClElementwiseKernelDescriptor &desc, + ArgumentID src0_id, ArgumentID src1_id, ArgumentID &dst_id) { kernel_blueprint.impl().add_component( - std::make_unique( + std::make_unique( &kernel_blueprint, + desc, SharedVarLink{ src0_id, SharedVarIO::Input }, SharedVarLink{ src1_id, SharedVarIO::Input }, SharedVarLink{ dst_id, SharedVarIO::Output })); return Status{}; } + +Status add_kcomp_floor(ClKernelBlueprint &kernel_blueprint, const ClFloorKernelDescriptor &, + ArgumentID src_id, ArgumentID &dst_id) +{ + kernel_blueprint.impl().add_component( + std::make_unique( + &kernel_blueprint, + SharedVarLink{ src_id, SharedVarIO::Input }, + SharedVarLink{ dst_id, SharedVarIO::Output })); + + return Status{}; +} + Status add_kcomp_activation(ClKernelBlueprint &, const ClActivationKernelDescriptor &, ArgumentID, ArgumentID &) { return Status{}; diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h index 067e9737e3..463fc5e7cf 100644 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h @@ -59,9 +59,13 @@ public: }; ///// Kernel Components ///// -/** Component: Eltwise Add */ -Status add_kcomp_eltwise_add(ClKernelBlueprint &, const ClEltwiseAddKernelDescriptor &, ArgumentID src0_id, - ArgumentID src1_id, ArgumentID &dst_id); +/** Component: Eltwise Operator */ +Status add_kcomp_eltwise_op(ClKernelBlueprint &, const ClElementwiseKernelDescriptor &, ArgumentID src0_id, + ArgumentID src1_id, ArgumentID &dst_id); + +/** Component: Floor */ +Status add_kcomp_floor(ClKernelBlueprint &, const ClFloorKernelDescriptor &, ArgumentID src_id, + ArgumentID &dst_id); /** Component: Activation */ Status add_kcomp_activation(ClKernelBlueprint &, const ClActivationKernelDescriptor &, ArgumentID src_id, ArgumentID &dst_id); diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp index b63e2167b7..811cd79811 100644 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp @@ -237,7 +237,7 @@ std::string ClDirectConvolutionKernelComponent::get_component_code() const T_LOAD({{BIA_DATA_TYPE}}, 1, N0, BUFFER, {{bias}}, cout, 0, 1, 0, bias0); // c = c + bias[broadcasted] - T_ADD_BROADCAST_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, bias0, {{dst}}); + T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, bias0, {{dst}}); )_"; } diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp deleted file mode 100644 index 965a68f51d..0000000000 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION - -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ComponentType ClElementwiseAddKernelComponent::get_component_type() const -{ - return ComponentType::Simple; -} - -std::set ClElementwiseAddKernelComponent::get_headers_list() const -{ - return std::set { "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h", "tile_helpers.h" }; -} - -Window ClElementwiseAddKernelComponent::get_window() const -{ - const ITensorInfo *lhs_info = _blueprint->impl().get_kernel_argument_info(_lhs.arg_id); - const ITensorInfo *rhs_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id); - ITensorInfo *dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - - ARM_COMPUTE_ERROR_ON_NULLPTR(lhs_info, rhs_info, dst_info); - - const std::pair broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*lhs_info, *rhs_info); - const TensorShape &out_shape = broadcast_pair.first; - - auto_init_if_empty(*dst_info, out_shape, 1, lhs_info->data_type()); - - const unsigned int vector_size_byte_opencl = 16; - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / dst_info->element_size(), dst_info->dimension(0)); - Window win = calculate_max_window(*dst_info, Steps(num_elems_processed_per_iteration)); - - return win; -} - -std::string ClElementwiseAddKernelComponent::get_component_code() const -{ - std::string code; - const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument; - - if(is_root) - { - return R"_( - //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_ADD --------------------- - // IN_0(LHS) {{lhs}} - // IN_1(RHS) {{rhs}} - // OUT(dst, accum) {{dst}} - - // dst = lhs + rhs (mix-precision, broadcast, boundary aware) - TILE({{DATA_TYPE}}, M0, N0, {{dst}}); - { - TILE({{DATA_TYPE}}, M0, N0, lhs_tile); - TILE({{DATA_TYPE}}, M0, N0, rhs_tile); - - T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{lhs}}, cout, mout, 1, {{lhs}}_stride_y, lhs_tile); - T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{rhs}}, cout, mout, 1, {{rhs}}_stride_y, rhs_tile); - -#if defined(IS_BROADCAST) - T_ADD_BROADCAST_X({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}}); -#else // !defined(IS_BROADCAST) - T_ADD({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}}); -#endif // defined(IS_BROADCAST) - - } - //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_ADD --------------------- -)_"; - } - else - { - return R"_( - //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_ADD --------------------- - // IN_0/Out(Accumulator) {{acc}} - // IN_1(Addend) {{addend}} - - // acc = addend + acc (mix-precision, broadcast, boundary aware) - { - TILE({{DATA_TYPE}}, M0, N0, addend_tile); - - T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{addend}}, cout, mout, 1, {{addend}}_stride_y, addend_tile); - -#if defined(IS_BROADCAST) - T_ADD_BROADCAST_X({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}}); -#else // !defined(IS_BROADCAST) - T_ADD({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}}); -#endif // defined(IS_BROADCAST) - } - //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_ADD --------------------- -)_"; - } -} - -CLBuildOptions ClElementwiseAddKernelComponent::generate_build_options() const -{ - const auto t_src_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id); - const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - - CLBuildOptions build_opts{}; - const auto n0 = _blueprint->impl().get_execution_window().x().step(); - const auto m0 = _blueprint->impl().get_execution_window().y().step(); - const bool is_broadcast = t_src_info->tensor_shape() != t_dst_info->tensor_shape(); - - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option_if(is_broadcast, "-DIS_BROADCAST"); - - return build_opts; -} - -std::string ClElementwiseAddKernelComponent::generate_config_id() const -{ - auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - std::string config_id{}; - config_id += lower_string(string_from_data_type(t_dst_info->data_type())); - config_id += "_"; - config_id += support::cpp11::to_string(t_dst_info->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(t_dst_info->dimension(1)); - config_id += "_"; - config_id += lower_string(string_from_data_layout(t_dst_info->data_layout())); - return config_id; -} - -void ClElementwiseAddKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const -{ - const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument; - vtable.add(_lhs, _blueprint->impl().group(_lhs.arg_id), ClKernelArgDescriptor(_lhs.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "lhs"); - vtable.add(_rhs, _blueprint->impl().group(_rhs.arg_id), ClKernelArgDescriptor(_rhs.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "rhs"); - if(is_root) - { - vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst"); - } -} - -ClElementwiseAddKernelComponent::TagLUT ClElementwiseAddKernelComponent::get_tag_lut(const SharedVarTable &vtable) const -{ - TagLUT lut{}; - const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - // Arguments and global shared variables - const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument; - if(is_root) - { - lut["lhs"] = vtable.get(_lhs); - lut["rhs"] = vtable.get(_rhs); - lut["dst"] = vtable.get(_dst); - } - else - { - // Determine which link is the accumulator - Link accumulator; - Link addend; - if(_blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Automatic) - { - accumulator = _lhs; - addend = _rhs; - } - else if(_blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Automatic) - { - accumulator = _rhs; - addend = _lhs; - } - else - { - ARM_COMPUTE_ERROR("Invalid elementwise component linking"); - } - lut["acc"] = vtable.get(accumulator); - lut["addend"] = vtable.get(addend); - } - // Local build options - lut["meta_kernel_id"] = id(); - lut["DATA_TYPE"] = get_cl_type_from_data_type(t_dst_info->data_type()); - return lut; -} -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.h deleted file mode 100644 index 5f8b1569ac..0000000000 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION - -#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLELEMENTWISEADDKERNELCOMPONENT_H -#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLELEMENTWISEADDKERNELCOMPONENT_H - -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClElementwiseAddKernelComponent : public IClKernelComponent -{ -public: - ClElementwiseAddKernelComponent(ClKernelBlueprint *blueprint, const Link &lhs, const Link &rhs, const Link &dst) - : IClKernelComponent(blueprint), _lhs{ lhs }, _rhs{ rhs }, _dst{ dst } - { - } - - ComponentType get_component_type() const override; - std::set get_headers_list() const override; - std::string get_component_code() const override; - Window get_window() const override; - CLBuildOptions generate_build_options() const override; - std::string generate_config_id() const override; - - virtual std::vector get_links() const override - { - return { _lhs, _rhs, _dst }; - } - - virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override; - virtual void allocate_shared_vars(SharedVarTable &vtable) const override; - - virtual std::string name() const override - { - return "eltwise_add_" + std::to_string(id()); - } - -private: - Link _lhs{}; - Link _rhs{}; - Link _dst{}; -}; - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif // ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLELEMENTWISEADDKERNELCOMPONENT_H -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp new file mode 100644 index 0000000000..24a9eee9a3 --- /dev/null +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION + +#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h" +#include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +ComponentType ClElementwiseKernelComponent::get_component_type() const +{ + return ComponentType::Simple; +} + +std::set ClElementwiseKernelComponent::get_headers_list() const +{ + return std::set { "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h", "tile_helpers.h" }; +} + +Window ClElementwiseKernelComponent::get_window() const +{ + const ITensorInfo *lhs_info = _blueprint->impl().get_kernel_argument_info(_lhs.arg_id); + const ITensorInfo *rhs_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id); + ITensorInfo *dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); + + ARM_COMPUTE_ERROR_ON_NULLPTR(lhs_info, rhs_info, dst_info); + + const std::pair broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*lhs_info, *rhs_info); + const TensorShape &out_shape = broadcast_pair.first; + + auto_init_if_empty(*dst_info, out_shape, 1, lhs_info->data_type()); + + const unsigned int vector_size_byte_opencl = 16; + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / dst_info->element_size(), dst_info->dimension(0)); + Window win = calculate_max_window(*dst_info, Steps(num_elems_processed_per_iteration)); + + return win; +} + +std::string ClElementwiseKernelComponent::get_component_code() const +{ + std::string code; + const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument; + + if(is_root) + { + return R"_( + //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_OP --------------------- + // IN_0(LHS) {{lhs}} + // IN_1(RHS) {{rhs}} + // OUT(dst, accum) {{dst}} + + // dst = lhs + rhs (mix-precision, broadcast, boundary aware) + TILE({{DATA_TYPE}}, M0, N0, {{dst}}); + { + TILE({{DATA_TYPE}}, M0, N0, lhs_tile); + TILE({{DATA_TYPE}}, M0, N0, rhs_tile); + + T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{lhs}}, cout, mout, 1, {{lhs}}_stride_y, lhs_tile); + T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{rhs}}, cout, mout, 1, {{rhs}}_stride_y, rhs_tile); + +#if defined(IS_BROADCAST) + T_ELTWISE_BROADCAST_{{ELTWISE_OP}}_X({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}}); +#else // !defined(IS_BROADCAST) + T_ELTWISE_{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}}); +#endif // defined(IS_BROADCAST) + + } + //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_OP --------------------- +)_"; + } + else + { + return R"_( + //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_OP --------------------- + // IN_0/Out(Accumulator) {{acc}} + // IN_1(Addend) {{addend}} + + // acc = addend + acc (mix-precision, broadcast, boundary aware) + { + TILE({{DATA_TYPE}}, M0, N0, addend_tile); + + T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{addend}}, cout, mout, 1, {{addend}}_stride_y, addend_tile); + +#if defined(IS_BROADCAST) + T_ELTWISE_BROADCAST_{{ELTWISE_OP}}_X({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}}); +#else // !defined(IS_BROADCAST) + T_ELTWISE_{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}}); +#endif // defined(IS_BROADCAST) + } + //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_OP --------------------- +)_"; + } +} + +CLBuildOptions ClElementwiseKernelComponent::generate_build_options() const +{ + const auto t_src_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id); + const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); + + CLBuildOptions build_opts{}; + const auto n0 = _blueprint->impl().get_execution_window().x().step(); + const auto m0 = _blueprint->impl().get_execution_window().y().step(); + const bool is_broadcast = t_src_info->tensor_shape() != t_dst_info->tensor_shape(); + + build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); + build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); + build_opts.add_option_if(is_broadcast, "-DIS_BROADCAST"); + + return build_opts; +} + +std::string ClElementwiseKernelComponent::generate_config_id() const +{ + auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); + std::string config_id{}; + config_id += lower_string(string_from_data_type(t_dst_info->data_type())); + config_id += "_"; + config_id += support::cpp11::to_string(t_dst_info->dimension(0)); + config_id += "_"; + config_id += support::cpp11::to_string(t_dst_info->dimension(1)); + config_id += "_"; + config_id += lower_string(string_from_data_layout(t_dst_info->data_layout())); + return config_id; +} + +void ClElementwiseKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const +{ + const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument; + vtable.add(_lhs, _blueprint->impl().group(_lhs.arg_id), ClKernelArgDescriptor(_lhs.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "lhs"); + vtable.add(_rhs, _blueprint->impl().group(_rhs.arg_id), ClKernelArgDescriptor(_rhs.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "rhs"); + if(is_root) + { + vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst"); + } +} + +ClElementwiseKernelComponent::TagLUT ClElementwiseKernelComponent::get_tag_lut(const SharedVarTable &vtable) const +{ + TagLUT lut{}; + const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); + // Arguments and global shared variables + const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument; + if(is_root) + { + lut["lhs"] = vtable.get(_lhs); + lut["rhs"] = vtable.get(_rhs); + lut["dst"] = vtable.get(_dst); + } + else + { + // Determine which link is the accumulator + Link accumulator; + Link addend; + if(_blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Automatic) + { + accumulator = _lhs; + addend = _rhs; + } + else if(_blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Automatic) + { + accumulator = _rhs; + addend = _lhs; + } + else + { + ARM_COMPUTE_ERROR("Invalid elementwise component linking"); + } + lut["acc"] = vtable.get(accumulator); + lut["addend"] = vtable.get(addend); + } + // Local build options + lut["meta_kernel_id"] = id(); + lut["DATA_TYPE"] = get_cl_type_from_data_type(t_dst_info->data_type()); + + switch(_desc.eltwise.op) + { + case ArithmeticOperation::DIV: + lut["ELTWISE_OP"] = "DIV"; + break; + case ArithmeticOperation::ADD: + lut["ELTWISE_OP"] = "ADD"; + break; + default: + ARM_COMPUTE_ERROR("Arithmetic Operation not supported"); + } + return lut; +} +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute +#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h new file mode 100644 index 0000000000..91b14ffafa --- /dev/null +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION + +#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLELEMENTWISEADDKERNELCOMPONENT_H +#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLELEMENTWISEADDKERNELCOMPONENT_H + +#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +class ClElementwiseKernelComponent : public IClKernelComponent +{ +public: + ClElementwiseKernelComponent(ClKernelBlueprint *blueprint, const ClElementwiseKernelDescriptor &desc, const Link &lhs, const Link &rhs, const Link &dst) + : IClKernelComponent(blueprint), _desc{ desc }, _lhs{ lhs }, _rhs{ rhs }, _dst{ dst } + { + } + + ComponentType get_component_type() const override; + std::set get_headers_list() const override; + std::string get_component_code() const override; + Window get_window() const override; + CLBuildOptions generate_build_options() const override; + std::string generate_config_id() const override; + + virtual std::vector get_links() const override + { + return { _lhs, _rhs, _dst }; + } + + virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override; + virtual void allocate_shared_vars(SharedVarTable &vtable) const override; + + virtual std::string name() const override + { + return "eltwise_add_" + std::to_string(id()); + } + +private: + ClElementwiseKernelDescriptor _desc{}; + Link _lhs{}; + Link _rhs{}; + Link _dst{}; +}; + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute +#endif // ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLELEMENTWISEADDKERNELCOMPONENT_H +#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp new file mode 100644 index 0000000000..87cc110561 --- /dev/null +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION + +#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h" +#include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +ComponentType ClFloorKernelComponent::get_component_type() const +{ + return ComponentType::Simple; +} + +std::set ClFloorKernelComponent::get_headers_list() const +{ + return std::set { "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h", "tile_helpers.h" }; +} + +Window ClFloorKernelComponent::get_window() const +{ + const ITensorInfo *src_info = _blueprint->impl().get_kernel_argument_info(_src.arg_id); + ITensorInfo *dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); + + ARM_COMPUTE_ERROR_ON_NULLPTR(src_info, dst_info); + auto_init_if_empty(*dst_info, src_info->tensor_shape(), 1, src_info->data_type()); + + const unsigned int vector_size_byte_opencl = 16; + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / dst_info->element_size(), dst_info->dimension(0)); + Window win = calculate_max_window(*dst_info, Steps(num_elems_processed_per_iteration)); + + return win; +} + +std::string ClFloorKernelComponent::get_component_code() const +{ + return R"_( + //------------------ START KERNEL {{meta_kernel_id}} FLOOR --------------------- + // IN_0(src) {{src}} + // OUT(dst, accum) {{dst}} + TILE({{DATA_TYPE}}, M0, N0, {{dst}}); + { + TILE({{DATA_TYPE}}, M0, N0, src_tile); + + T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{src}}, cout, mout, 1, {{src}}_stride_y, src_tile); + T_FLOOR({{DATA_TYPE}}, M0, N0, src_tile, {{dst}}); + } + + //------------------ END KERNEL {{meta_kernel_id}} FLOOR --------------------- +)_"; +} + +CLBuildOptions ClFloorKernelComponent::generate_build_options() const +{ + CLBuildOptions build_opts{}; + + const auto n0 = _blueprint->impl().get_execution_window().x().step(); + const auto m0 = _blueprint->impl().get_execution_window().y().step(); + + const auto dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); + const unsigned int partial_store_n0 = dst_info->dimension(0) % n0; + build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); + build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); + build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); + + return build_opts; +} + +std::string ClFloorKernelComponent::generate_config_id() const +{ + auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); + std::string config_id{}; + config_id += lower_string(string_from_data_type(t_dst_info->data_type())); + config_id += "_"; + config_id += support::cpp11::to_string(t_dst_info->dimension(0)); + config_id += "_"; + config_id += support::cpp11::to_string(t_dst_info->dimension(1)); + config_id += "_"; + config_id += lower_string(string_from_data_layout(t_dst_info->data_layout())); + return config_id; +} + +void ClFloorKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const +{ + vtable.add(_src, _blueprint->impl().group(_src.arg_id), ClKernelArgDescriptor(_src.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "src"); + vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst"); +} + +ClFloorKernelComponent::TagLUT ClFloorKernelComponent::get_tag_lut(const SharedVarTable &vtable) const +{ + TagLUT lut{}; + const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); + // Arguments and global shared variables + lut["src"] = vtable.get(_src); + lut["dst"] = vtable.get(_dst); + lut["meta_kernel_id"] = id(); + lut["DATA_TYPE"] = get_cl_type_from_data_type(t_dst_info->data_type()); + return lut; +} +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute +#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h new file mode 100644 index 0000000000..5463e233d4 --- /dev/null +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION + +#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLFLOORKERNELCOMPONENT_H +#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLFLOORKERNELCOMPONENT_H + +#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +class ClFloorKernelComponent : public IClKernelComponent +{ +public: + ClFloorKernelComponent(ClKernelBlueprint *blueprint, const Link &src, const Link &dst) + : IClKernelComponent(blueprint), _src{ src }, _dst{ dst } + { + } + + ComponentType get_component_type() const override; + std::set get_headers_list() const override; + std::string get_component_code() const override; + Window get_window() const override; + CLBuildOptions generate_build_options() const override; + std::string generate_config_id() const override; + + virtual std::vector get_links() const override + { + return { _src, _dst }; + } + + virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override; + virtual void allocate_shared_vars(SharedVarTable &vtable) const override; + + virtual std::string name() const override + { + return "floor_" + std::to_string(id()); + } + +private: + Link _src{}; + Link _dst{}; +}; + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute +#endif // ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLFLOORKERNELCOMPONENT_H +#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h index 26e50523a9..3f99dd5553 100644 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h @@ -27,7 +27,8 @@ #define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_CLKERNELCOMPONENTS_H #include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h" -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.h" +#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h" +#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h" #include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h" #endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_CLKERNELCOMPONENTS_H diff --git a/src/core/experimental/dynamic_fusion/OperatorGraph.cpp b/src/core/experimental/dynamic_fusion/OperatorGraph.cpp index a335e5aada..bd88afdb47 100644 --- a/src/core/experimental/dynamic_fusion/OperatorGraph.cpp +++ b/src/core/experimental/dynamic_fusion/OperatorGraph.cpp @@ -198,7 +198,7 @@ void force_conv2d_method(OperatorGraph &graph, Operator conv2d, ConvolutionMetho node->set_method(method); } -Operator add_op_elementwise_add(OperatorGraph &graph, const AddDescriptor &desc, OpTensor lhs, OpTensor rhs, OpTensor dst) +Operator add_op_elementwise_op(OperatorGraph &graph, const ElementwiseDescriptor &desc, OpTensor lhs, OpTensor rhs, OpTensor dst) { auto id = graph.impl()->graph.add_operator({ rhs.id(), lhs.id() }, { dst.id() }); check_dependency_graph_op_success(graph, id.first); @@ -224,7 +224,36 @@ Operator add_op_elementwise_add(OperatorGraph &graph, const AddDescriptor &desc, tensors.add_const_tensor(ACL_SRC_0, graph.impl()->tensors[lhs.id()].get()); tensors.add_const_tensor(ACL_SRC_1, graph.impl()->tensors[rhs.id()].get()); tensors.add_const_tensor(ACL_DST_0, graph.impl()->tensors[dst.id()].get()); - graph.impl()->add_node(id.second, desc, tensors); + graph.impl()->add_node(id.second, desc, tensors); + check_multiple_roots(graph); + + return op_node; +} + +Operator add_op_floor(OperatorGraph &graph, const FloorDescriptor &desc, OpTensor src, OpTensor dst) +{ + auto id = graph.impl()->graph.add_operator({ src.id() }, { dst.id() }); + check_dependency_graph_op_success(graph, id.first); + + Operator op_node(id.second); + + // Infer TensorInfo + auto node_src = graph.impl()->tensors[src.id()]->get_tensor_info(); + OpTensorContent *node_dst = graph.impl()->tensors[dst.id()].get(); + + if(node_dst->get_tensor_info()->total_size() == 0) + { + auto_init_if_empty(*(node_dst->get_tensor_info()), *node_src); + } + + // Check execution space + auto dst_info = node_dst->get_tensor_info(); + check_execution_shape(graph, *dst_info); + + ITensorDescPack tensors; + tensors.add_const_tensor(ACL_SRC_0, graph.impl()->tensors[src.id()].get()); + tensors.add_const_tensor(ACL_DST_0, graph.impl()->tensors[dst.id()].get()); + graph.impl()->add_node(id.second, desc, tensors); check_multiple_roots(graph); return op_node; diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h index a9ccf908f0..f10e97e3e9 100644 --- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h +++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h @@ -42,14 +42,24 @@ struct ClDirectConv2dKernelDescriptor Conv2dDescriptor conv2d{}; }; -struct ClEltwiseAddKernelDescriptor +struct ClElementwiseKernelDescriptor { - friend bool operator==(const ClEltwiseAddKernelDescriptor &desc0, const ClEltwiseAddKernelDescriptor &desc1) + friend bool operator==(const ClElementwiseKernelDescriptor &desc0, const ClElementwiseKernelDescriptor &desc1) { - return desc0.add == desc1.add; + return desc0.eltwise == desc1.eltwise; } - AddDescriptor add{}; + ElementwiseDescriptor eltwise{}; }; + +struct ClFloorKernelDescriptor +{ + friend bool operator==(const ClFloorKernelDescriptor &desc0, const ClFloorKernelDescriptor &desc1) + { + return desc0.floor == desc1.floor; + } + FloorDescriptor floor{}; +}; + struct ClActivationKernelDescriptor { friend bool operator==(const ClActivationKernelDescriptor &, const ClActivationKernelDescriptor &) diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp index de58ce70ed..cab51a2ce6 100644 --- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp +++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp @@ -124,7 +124,7 @@ bool ClDirectConv2dKernel::operator==(const ClKernel &other) const return config() == other.config() && tensors() == other.tensors() && desc == converted.desc; } -Status ClAddKernel::generate(ClKernelBlueprint &bp) const +Status ClElementwiseKernel::generate(ClKernelBlueprint &bp) const { const auto lhs = _tensors.get_const_tensor(TensorType::ACL_SRC_0); const auto rhs = _tensors.get_const_tensor(TensorType::ACL_SRC_1); @@ -137,11 +137,11 @@ Status ClAddKernel::generate(ClKernelBlueprint &bp) const ArgumentID dst_id; add_tensor(bp, dst->desc, dst_id, dst->id); - add_kcomp_eltwise_add(bp, desc, lhs_id, rhs_id, dst_id); + add_kcomp_eltwise_op(bp, desc, lhs_id, rhs_id, dst_id); return Status{}; } -Status ClAddKernel::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst) +Status ClElementwiseKernel::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst) { // 1. Check validity ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs, dst); @@ -186,9 +186,61 @@ Status ClAddKernel::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, con return Status{}; } -bool ClAddKernel::operator==(const ClKernel &other) const +bool ClElementwiseKernel::operator==(const ClKernel &other) const { - const auto converted = *utils::cast::polymorphic_downcast(&other); + const auto converted = *utils::cast::polymorphic_downcast(&other); + return config() == other.config() && tensors() == other.tensors() && desc == converted.desc; +} + +Status ClFloorKernel::generate(ClKernelBlueprint &bp) const +{ + const auto src = _tensors.get_const_tensor(TensorType::ACL_SRC_0); + const auto dst = _tensors.get_const_tensor(TensorType::ACL_DST_0); + ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); + ArgumentID src_id; + add_tensor(bp, src->desc, src_id, src->id); + ArgumentID dst_id; + add_tensor(bp, dst->desc, dst_id, dst->id); + + add_kcomp_floor(bp, desc, src_id, dst_id); + return Status{}; +} + +Status ClFloorKernel::validate(const ITensorInfo *src, const ITensorInfo *dst) +{ + // 1. Check validity + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); + + // Matching data type + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst); + + // Matching data layout + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst); + + // All tensor infos are initialized + ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0); + ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0); + + // Device requirements are met + ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src); + + // dst shape is correct + ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(src->tensor_shape(), dst->tensor_shape(), 0), "Wrong shape for dst"); + + // 2. Check support level + + // Data type + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F32, DataType::F16); + + // Data layout + ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC); + + return Status{}; +} + +bool ClFloorKernel::operator==(const ClKernel &other) const +{ + const auto converted = *utils::cast::polymorphic_downcast(&other); return config() == other.config() && tensors() == other.tensors() && desc == converted.desc; } @@ -202,6 +254,7 @@ std::vector traverse(const ClKernelGraph &graph) } return kernels; } + std::vector traverse(ClKernelGraph &graph) { std::vector kernels; diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h index 54e01ea850..c3580cfaca 100644 --- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h +++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h @@ -139,16 +139,16 @@ public: ClDirectConv2dKernelDescriptor desc{}; }; -struct ClAddKernel : public ClKernel +struct ClElementwiseKernel : public ClKernel { public: Complexity complexity() const override { return Complexity::Simple; } - ClAddKernel() = default; - ~ClAddKernel() override = default; - ClAddKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig &config, const ClEltwiseAddKernelDescriptor &desc, const ITensorDescPack tensors) + ClElementwiseKernel() = default; + ~ClElementwiseKernel() override = default; + ClElementwiseKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig &config, const ClElementwiseKernelDescriptor &desc, const ITensorDescPack tensors) : ClKernel{ graph, id, config, tensors }, desc{ desc } { } @@ -156,7 +156,27 @@ public: bool operator==(const ClKernel &other) const override; Status generate(ClKernelBlueprint &bp) const override; - ClEltwiseAddKernelDescriptor desc{}; + ClElementwiseKernelDescriptor desc{}; +}; + +struct ClFloorKernel : public ClKernel +{ +public: + Complexity complexity() const override + { + return Complexity::Simple; + } + ClFloorKernel() = default; + ~ClFloorKernel() override = default; + ClFloorKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig &config, const ClFloorKernelDescriptor &desc, const ITensorDescPack tensors) + : ClKernel{ graph, id, config, tensors }, desc{ desc } + { + } + static Status validate(const ITensorInfo *src, const ITensorInfo *dst); + bool operator==(const ClKernel &other) const override; + Status generate(ClKernelBlueprint &bp) const override; + + ClFloorKernelDescriptor desc{}; }; struct ClKernelGraph diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp index f971196729..274a2517bb 100644 --- a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp +++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp @@ -113,9 +113,14 @@ bool operator==(const Conv2dDescriptor &conv2d0, const Conv2dDescriptor &conv2d1 return std::make_tuple(conv2d0.pad, conv2d0.stride, conv2d0.dilation) == std::make_tuple(conv2d1.pad, conv2d1.stride, conv2d1.dilation); } -bool operator==(const AddDescriptor &, const AddDescriptor &) +bool operator==(const ElementwiseDescriptor &ed0, const ElementwiseDescriptor &ed1) { - return std::make_tuple() == std::make_tuple(); // Currently two Add ops are always the same + return ed0.op == ed1.op; // Compare Arithmatic Operations of two ElementwiseDescriptor objects +} + +bool operator==(const FloorDescriptor &, const FloorDescriptor &) +{ + return std::make_tuple() == std::make_tuple(); // Currently two Floor ops are always the same } bool Conv2dContent::operator==(const OperatorContent &other) const @@ -124,9 +129,15 @@ bool Conv2dContent::operator==(const OperatorContent &other) const return desc == converted.desc; } -bool AddContent::operator==(const OperatorContent &other) const +bool ElementwiseContent::operator==(const OperatorContent &other) const +{ + const auto converted = *utils::cast::polymorphic_downcast(&other); + return desc == converted.desc; +} + +bool FloorContent::operator==(const OperatorContent &other) const { - const auto converted = *utils::cast::polymorphic_downcast(&other); + const auto converted = *utils::cast::polymorphic_downcast(&other); return desc == converted.desc; } @@ -311,7 +322,7 @@ Status Conv2dContent::translate_direct_conv2d(ClKernelGraph &kernel_graph) const return Status{}; } -Status AddContent::translate(ClKernelGraph &kernel_graph) const +Status ElementwiseContent::translate(ClKernelGraph &kernel_graph) const { const auto lhs = _tensors.get_const_tensor(TensorType::ACL_SRC_0); const auto rhs = _tensors.get_const_tensor(TensorType::ACL_SRC_1); @@ -338,16 +349,46 @@ Status AddContent::translate(ClKernelGraph &kernel_graph) const DependencyGraph::Id add_id; ClKernelConfig config{ UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }, TileDescriptor{}, StoreType::TStoreIndirectWidthSelect }; - st = ClAddKernel::validate(lhs->desc, rhs->desc, dst->desc); + st = ClElementwiseKernel::validate(lhs->desc, rhs->desc, dst->desc); ARM_COMPUTE_RETURN_ON_ERROR(st); - st = kernel_graph.add_kernel(config, ClEltwiseAddKernelDescriptor{ desc }, tensors, add_id); + st = kernel_graph.add_kernel(config, ClElementwiseKernelDescriptor{ desc }, tensors, add_id); ARM_COMPUTE_RETURN_ON_ERROR(st); ARM_COMPUTE_UNUSED(add_id); return Status{}; } +Status FloorContent::translate(ClKernelGraph &kernel_graph) const +{ + const auto src = _tensors.get_const_tensor(TensorType::ACL_SRC_0); + const auto dst = _tensors.get_const_tensor(TensorType::ACL_DST_0); + ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); + + ITensorDescPack tensors; + + DependencyGraph::Id src_id; + auto st = add_kernel_tensor(kernel_graph, *_graph, *src, src_id); + ARM_COMPUTE_RETURN_ON_ERROR(st); + tensors.add_const_tensor(ACL_SRC_0, kernel_graph.get_tensor(src_id)); + + DependencyGraph::Id dst_id; + st = add_kernel_tensor(kernel_graph, *_graph, *dst, dst_id); + ARM_COMPUTE_RETURN_ON_ERROR(st); + tensors.add_const_tensor(ACL_DST_0, kernel_graph.get_tensor(dst_id)); + + DependencyGraph::Id add_id; + ClKernelConfig config{ UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }, TileDescriptor{}, StoreType::TStoreIndirectWidthSelect }; + + st = ClFloorKernel::validate(src->desc, dst->desc); + ARM_COMPUTE_RETURN_ON_ERROR(st); + + st = kernel_graph.add_kernel(config, ClFloorKernelDescriptor{ desc }, tensors, add_id); + ARM_COMPUTE_RETURN_ON_ERROR(st); + + return Status{}; +} + std::vector traverse(const OperatorGraph::Implementation &graph) { std::vector ops; diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h index 2786d610e1..b303cdb9fc 100644 --- a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h +++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h @@ -157,19 +157,19 @@ private: Status translate_direct_conv2d(ClKernelGraph &kernel_graph) const; }; -class AddContent : public OperatorContent +class ElementwiseContent : public OperatorContent { public: - AddContent() = default; - AddContent(const OperatorGraph::Implementation *graph, Id id, const AddDescriptor &desc, const ITensorDescPack &tensors) + ElementwiseContent() = default; + ElementwiseContent(const OperatorGraph::Implementation *graph, Id id, const ElementwiseDescriptor &desc, const ITensorDescPack &tensors) : OperatorContent(graph, id, tensors), desc(desc) { } - ~AddContent() = default; - AddContent(const AddContent &) = default; - AddContent &operator=(const AddContent &) = default; - AddContent(AddContent &&) = default; - AddContent &operator=(AddContent &&) = default; + ~ElementwiseContent() = default; + ElementwiseContent(const ElementwiseContent &) = default; + ElementwiseContent &operator=(const ElementwiseContent &) = default; + ElementwiseContent(ElementwiseContent &&) = default; + ElementwiseContent &operator=(ElementwiseContent &&) = default; bool operator==(const OperatorContent &other) const override; OperatorComplexity complexity() const override { @@ -178,7 +178,31 @@ public: Status translate(ClKernelGraph &kernel_graph) const override; private: - AddDescriptor desc{}; + ElementwiseDescriptor desc{}; +}; + +class FloorContent : public OperatorContent +{ +public: + FloorContent() = default; + FloorContent(const OperatorGraph::Implementation *graph, Id id, const FloorDescriptor &desc, const ITensorDescPack &tensors) + : OperatorContent(graph, id, tensors), desc(desc) + { + } + ~FloorContent() = default; + FloorContent(const FloorContent &) = default; + FloorContent &operator=(const FloorContent &) = default; + FloorContent(FloorContent &&) = default; + FloorContent &operator=(FloorContent &&) = default; + bool operator==(const OperatorContent &other) const override; + OperatorComplexity complexity() const override + { + return OperatorComplexity::Simple; + } + Status translate(ClKernelGraph &kernel_graph) const override; + +private: + FloorDescriptor desc{}; }; struct OperatorGraph::Implementation diff --git a/tests/validation/CL/UNIT/dynamic_fusion/ClCompositeKernel.cpp b/tests/validation/CL/UNIT/dynamic_fusion/ClCompositeKernel.cpp index 96a845c36e..3ffbc077c6 100644 --- a/tests/validation/CL/UNIT/dynamic_fusion/ClCompositeKernel.cpp +++ b/tests/validation/CL/UNIT/dynamic_fusion/ClCompositeKernel.cpp @@ -74,8 +74,9 @@ TEST_CASE(MoveNet_SubGraph_1_DirectConv2d, framework::DatasetMode::ALL) ClExecutionDescriptor exec_desc{}; Status st{}; - const auto data_type = DataType::F32; - const auto conv_info = Conv2dDescriptor{ Padding2D{ 1U, 1U, 1U, 1U }, { 1U, 1U } /* stride */ }; + const auto data_type = DataType::F32; + const auto conv_info = Conv2dDescriptor{ Padding2D{ 1U, 1U, 1U, 1U }, { 1U, 1U } /* stride */ }; + const auto eltwise_info = ElementwiseDescriptor{ ArithmeticOperation::ADD }; const auto width = 7U; const auto height = 6U; @@ -99,7 +100,7 @@ TEST_CASE(MoveNet_SubGraph_1_DirectConv2d, framework::DatasetMode::ALL) const auto m0 = (OFM > 16) ? ((data_type == DataType::F32) ? 2U : 4U) : 1U; const ClDirectConv2dKernelDescriptor direct_conv2d_desc{ conv_info }; - const ClEltwiseAddKernelDescriptor eltwise_add_desc{}; + const ClElementwiseKernelDescriptor eltwise_add_desc{ eltwise_info }; const TileDescriptor store_tile_info{ Size2D(n0, m0), Size2D(width, height), ClippingStrategy::TOP_LEFT }; ArgumentID src_id{ g_arg_placeholder }; @@ -119,7 +120,7 @@ TEST_CASE(MoveNet_SubGraph_1_DirectConv2d, framework::DatasetMode::ALL) st = add_tensor(bp, &dst_info, dst_id); st = add_kcomp_direct_conv2d(bp, direct_conv2d_desc, src_id, wei_id, bia_id, acc_id); - st = add_kcomp_eltwise_add(bp, eltwise_add_desc, addend_id, acc_id, acc_1_id); + st = add_kcomp_eltwise_op(bp, eltwise_add_desc, addend_id, acc_id, acc_1_id); st = add_kcomp_store(bp, StoreType::TStoreIndirectWidthSelect, acc_1_id, dst_id); exec_desc.skip_sliding_window = true; diff --git a/tests/validation/CL/UNIT/dynamic_fusion/Floor.cpp b/tests/validation/CL/UNIT/dynamic_fusion/Floor.cpp new file mode 100644 index 0000000000..2b8f69e5e7 --- /dev/null +++ b/tests/validation/CL/UNIT/dynamic_fusion/Floor.cpp @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION +#include "arm_compute/core/TensorInfo.h" + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/experimental/ClWorkload.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/experimental/ClCompositeOperator.h" +#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h" +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/validation/CL/UNIT/dynamic_fusion/Utils.h" +#include "tests/validation/Validation.h" + +#include "tests/validation/reference/Floor.h" +#include "tests/validation/reference/Permute.h" + +#ifdef ARM_COMPUTE_ASSERTS_ENABLED +#include "tests/SimpleTensorPrinter.h" +#endif /* ARM_COMPUTE_ASSERTS_ENABLED */ + +using namespace arm_compute::experimental::dynamic_fusion; +using namespace arm_compute::test::validation::utils; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +TEST_SUITE(CL) +TEST_SUITE(UNIT) +TEST_SUITE(DYNAMIC_FUSION) +TEST_CASE(Operator_Floor_1_F32, framework::DatasetMode::ALL) +{ + /* Computation: + * out = floor(input) + */ + const auto data_type = DataType::F32; + const auto data_layout = DataLayout::NHWC; + const auto t_shape = TensorShape(32, 16); + auto t_input_info = TensorInfo(t_shape, 1, data_type, data_layout); + auto t_dst_info = TensorInfo(); + + FloorDescriptor floor_desc{}; + + // Create reference + SimpleTensor ref_t_input{ t_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; + + // Fill reference + fill(ref_t_input, 0, library.get()); + + auto ref_t_input_nchw = reference::permute(ref_t_input, PermutationVector(1U, 2U, 0U)); + auto t_dst_shape_nchw = t_shape; + permute(t_dst_shape_nchw, PermutationVector(1U, 2U, 0U)); + + auto ref_t_dst_nchw = reference::floor_layer(ref_t_input_nchw); + const auto ref_t_dst = reference::permute(ref_t_dst_nchw, PermutationVector(2U, 0U, 1U)); + + CLScheduler::get().default_reinit(); + const auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + OperatorGraph op_graph; + + const auto op_t_input = add_tensor(op_graph, t_input_info); + const auto op_t_dst = add_tensor(op_graph, t_dst_info); + + add_op_floor(op_graph, floor_desc, op_t_input, op_t_dst); + + const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } }; + ClWorkload workload; + build(workload, op_graph, workload_ctx); + + ClCompositeOperator op; + op.configure(cl_compile_ctx, workload); + + // Construct tensors + CLTensor t_input{}; + CLTensor t_dst{}; + + // Init tensors + t_input.allocator()->init(t_input_info); + t_dst.allocator()->init(t_dst_info); + + // Allocate and fill tensors + t_input.allocator()->allocate(); + t_dst.allocator()->allocate(); + fill(CLAccessor(t_input), 0, library.get()); + // "Pack" tensors + OpTensorBinding bp_tensors({ { op_t_input, &t_input }, + { op_t_dst, &t_dst } + }); + + // Populate prepare and run pack-maps (including allocating aux tensors) + ClAuxTensorData aux_tensor_data{}; + TensorPackMap prepare_pack_map{}; + TensorPackMap run_pack_map{}; + bind_tensors(aux_tensor_data, prepare_pack_map, run_pack_map, workload, bp_tensors); + + op.prepare(prepare_pack_map); + op.run(run_pack_map); + RelativeTolerance tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ + validate(CLAccessor(t_dst), ref_t_dst_nchw, tolerance_f32); +} + +TEST_SUITE_END() // DYNAMIC_FUSION +TEST_SUITE_END() // UNIT +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file diff --git a/tests/validation/CL/UNIT/dynamic_fusion/Integration_OperatorFuseMovenetSubGraph1.cpp b/tests/validation/CL/UNIT/dynamic_fusion/Integration_OperatorFuseMovenetSubGraph1.cpp index fe8d23ef15..3a8b7c8ce8 100644 --- a/tests/validation/CL/UNIT/dynamic_fusion/Integration_OperatorFuseMovenetSubGraph1.cpp +++ b/tests/validation/CL/UNIT/dynamic_fusion/Integration_OperatorFuseMovenetSubGraph1.cpp @@ -77,8 +77,8 @@ TEST_CASE(Operator_Fuse_Movenet_SubGraph_1_F32, framework::DatasetMode::ALL) auto t_acc_info = TensorInfo(); // Intermediate tensor for cond3 auto t_dst_info = TensorInfo(); - Conv2dDescriptor conv2d_desc{}; - AddDescriptor add_desc{}; + Conv2dDescriptor conv2d_desc{}; + ElementwiseDescriptor add_desc{ ArithmeticOperation::ADD }; // Create reference SimpleTensor ref_t_input{ t_input_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; @@ -119,7 +119,7 @@ TEST_CASE(Operator_Fuse_Movenet_SubGraph_1_F32, framework::DatasetMode::ALL) auto conv2d = add_op_conv2d(op_graph, conv2d_desc, op_t_input, op_t_weight, op_t_acc); force_conv2d_method(op_graph, conv2d, ConvolutionMethod::DIRECT); - add_op_elementwise_add(op_graph, add_desc, op_t_acc, op_t_l1_addend, op_t_dst); + add_op_elementwise_op(op_graph, add_desc, op_t_acc, op_t_l1_addend, op_t_dst); const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } }; ClWorkload workload; @@ -180,8 +180,8 @@ TEST_CASE(DataType_QASYMM8, framework::DatasetMode::ALL) auto t_acc_info = TensorInfo(t_dst_shape, 1, data_type, data_layout); auto t_dst_info = TensorInfo(t_dst_shape, 1, data_type, data_layout); - Conv2dDescriptor conv2d_desc{}; - AddDescriptor add_desc{}; + Conv2dDescriptor conv2d_desc{}; + ElementwiseDescriptor add_desc{}; OperatorGraph op_graph; @@ -192,7 +192,7 @@ TEST_CASE(DataType_QASYMM8, framework::DatasetMode::ALL) const auto op_t_dst = add_tensor(op_graph, t_dst_info); auto conv2d = add_op_conv2d(op_graph, conv2d_desc, op_t_input, op_t_weight, op_t_acc); - add_op_elementwise_add(op_graph, add_desc, op_t_acc, op_t_l1_addend, op_t_dst); + add_op_elementwise_op(op_graph, add_desc, op_t_acc, op_t_l1_addend, op_t_dst); force_conv2d_method(op_graph, conv2d, ConvolutionMethod::DIRECT); const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } }; @@ -290,7 +290,7 @@ TEST_CASE(Enlarging_Execution_Space, framework::DatasetMode::ALL) auto t_dst_info = TensorInfo(); OperatorGraph op_graph; - const auto add_desc = AddDescriptor{}; + const auto add_desc = ElementwiseDescriptor{}; const auto op_t_l0_lhs = add_tensor(op_graph, t_l0_lhs_info); const auto op_t_l0_rhs = add_tensor(op_graph, t_l0_rhs_info); @@ -300,9 +300,9 @@ TEST_CASE(Enlarging_Execution_Space, framework::DatasetMode::ALL) const auto op_t_l1_dst = add_tensor(op_graph, t_l1_dst_info); // temp accumulator; TensorInfo to be inferred const auto op_t_dst = add_tensor(op_graph, t_dst_info); - add_op_elementwise_add(op_graph, add_desc, op_t_l0_lhs, op_t_l0_rhs, op_t_l0_dst); - add_op_elementwise_add(op_graph, add_desc, op_t_l0_dst, op_t_l1_rhs, op_t_l1_dst); - add_op_elementwise_add(op_graph, add_desc, op_t_l1_dst, op_t_l2_lhs, op_t_dst); + add_op_elementwise_op(op_graph, add_desc, op_t_l0_lhs, op_t_l0_rhs, op_t_l0_dst); + add_op_elementwise_op(op_graph, add_desc, op_t_l0_dst, op_t_l1_rhs, op_t_l1_dst); + add_op_elementwise_op(op_graph, add_desc, op_t_l1_dst, op_t_l2_lhs, op_t_dst); const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } }; ClWorkload workload; @@ -334,7 +334,7 @@ TEST_CASE(Root_Simple_And_Complex, framework::DatasetMode::ALL) OperatorGraph op_graph; const auto conv2d_desc = Conv2dDescriptor{}; - const auto add_desc = AddDescriptor{}; + const auto add_desc = ElementwiseDescriptor{}; const auto op_t_l0_0_input = add_tensor(op_graph, t_l0_0_input_info); const auto op_t_l0_0_weight = add_tensor(op_graph, t_l0_0_weight_info); @@ -345,8 +345,8 @@ TEST_CASE(Root_Simple_And_Complex, framework::DatasetMode::ALL) const auto op_t_dst = add_tensor(op_graph, t_dst_info); add_op_conv2d(op_graph, conv2d_desc, op_t_l0_0_input, op_t_l0_0_weight, op_t_l0_0_dst); - add_op_elementwise_add(op_graph, add_desc, op_t_l0_1_lhs, op_t_l0_1_rhs, op_t_l0_1_dst); - add_op_elementwise_add(op_graph, add_desc, op_t_l0_0_dst, op_t_l0_1_dst, op_t_dst); + add_op_elementwise_op(op_graph, add_desc, op_t_l0_1_lhs, op_t_l0_1_rhs, op_t_l0_1_dst); + add_op_elementwise_op(op_graph, add_desc, op_t_l0_0_dst, op_t_l0_1_dst, op_t_dst); const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } }; ClWorkload workload; @@ -374,7 +374,7 @@ TEST_CASE(Loop, framework::DatasetMode::ALL) OperatorGraph op_graph; const auto conv2d_desc = Conv2dDescriptor{}; - const auto add_desc = AddDescriptor{}; + const auto add_desc = ElementwiseDescriptor{}; const auto op_t_l0_lhs = add_tensor(op_graph, t_l0_lhs_info); const auto op_t_l1_lhs = add_tensor(op_graph, t_l1_lhs_info); @@ -382,7 +382,7 @@ TEST_CASE(Loop, framework::DatasetMode::ALL) const auto op_t_state1 = add_tensor(op_graph, state1_info); add_op_conv2d(op_graph, conv2d_desc, op_t_l0_lhs, op_t_state0, op_t_state1); - add_op_elementwise_add(op_graph, add_desc, op_t_l1_lhs, op_t_state1, op_t_state0); + add_op_elementwise_op(op_graph, add_desc, op_t_l1_lhs, op_t_state1, op_t_state0); const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } }; ClWorkload workload; -- cgit v1.2.1