aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Android.bp3
-rw-r--r--arm_compute/core/experimental/OperatorGraph.h33
-rw-r--r--examples/dynamic_fusion/cl_fused_conv2d_elementwise_add.cpp12
-rw-r--r--filelist.json3
-rw-r--r--src/core/CL/cl_kernels/common/gemmlowp.cl4
-rw-r--r--src/core/CL/cl_kernels/nhwc/direct_convolution.cl4
-rw-r--r--src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl4
-rw-r--r--src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl4
-rw-r--r--src/core/CL/cl_kernels/nhwc/dwc_native_quantized_nhwc.cl4
-rw-r--r--src/core/CL/cl_kernels/nhwc/winograd_output_transform.cl12
-rw-r--r--src/core/CL/cl_kernels/tile_helpers.h46
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp20
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h10
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp2
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp (renamed from src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp)46
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h (renamed from src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.h)13
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp130
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h74
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h3
-rw-r--r--src/core/experimental/dynamic_fusion/OperatorGraph.cpp33
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h18
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp63
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h30
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp55
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h42
-rw-r--r--tests/validation/CL/UNIT/dynamic_fusion/ClCompositeKernel.cpp9
-rw-r--r--tests/validation/CL/UNIT/dynamic_fusion/Floor.cpp135
-rw-r--r--tests/validation/CL/UNIT/dynamic_fusion/Integration_OperatorFuseMovenetSubGraph1.cpp30
28 files changed, 723 insertions, 119 deletions
diff --git a/Android.bp b/Android.bp
index 9f6f8defdf..74c2b96c37 100644
--- a/Android.bp
+++ b/Android.bp
@@ -370,7 +370,8 @@ cc_library_static {
"src/core/Version.cpp",
"src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp",
"src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp",
- "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp",
+ "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp",
+ "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp",
"src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.cpp",
"src/core/experimental/dynamic_fusion/OperatorGraph.cpp",
"src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.cpp",
diff --git a/arm_compute/core/experimental/OperatorGraph.h b/arm_compute/core/experimental/OperatorGraph.h
index fd8fcd5c47..cab83c7f8b 100644
--- a/arm_compute/core/experimental/OperatorGraph.h
+++ b/arm_compute/core/experimental/OperatorGraph.h
@@ -176,17 +176,18 @@ Operator add_op_conv2d(OperatorGraph &graph, const Conv2dDescriptor &desc, OpTen
*/
void force_conv2d_method(OperatorGraph &graph, Operator conv2d, ConvolutionMethod method);
-/** Descriptor for Addition operation
+/** Descriptor for Elementwise binary operation
*
*/
-struct AddDescriptor
+struct ElementwiseDescriptor
{
/* TOSA compliant attribute parameters start */
/* TOSA compliant attribute parameters end */
/* Non-TOSA compliant attribute parameters start */
+ ArithmeticOperation op;
/* Non-TOSA compliant attribute parameters end */
};
-/** Add op Add to @p graph, and optionally describes fusion through passing of intermediate @ref OpTensor s
+/** Add op Elementwise to @p graph, and optionally describes fusion through passing of intermediate @ref OpTensor s
*
* @param[in,out] graph OperatorGraph where the operator is added to
* @param[in] desc Operator descriptor
@@ -196,12 +197,34 @@ struct AddDescriptor
*
* @return Operator
*/
-Operator add_op_elementwise_add(OperatorGraph &graph, const AddDescriptor &desc, OpTensor lhs, OpTensor rhs, OpTensor dst);
+Operator add_op_elementwise_op(OperatorGraph &graph, const ElementwiseDescriptor &desc, OpTensor lhs, OpTensor rhs, OpTensor dst);
+
+/** Descriptor for Floor operation
+ *
+ */
+struct FloorDescriptor
+{
+ /* TOSA compliant attribute parameters start */
+ /* TOSA compliant attribute parameters end */
+ /* Non-TOSA compliant attribute parameters start */
+ /* Non-TOSA compliant attribute parameters end */
+};
+/** Add op Floor to @p graph, and optionally describes fusion through passing of intermediate @ref OpTensor s
+ *
+ * @param[in,out] graph OperatorGraph where the operator is added to
+ * @param[in] desc Operator descriptor
+ * @param[in] src Source OpTensor
+ * @param[in] dst Destination OpTensor
+ *
+ * @return Operator
+ */
+Operator add_op_floor(OperatorGraph &graph, const FloorDescriptor &desc, OpTensor src, OpTensor dst);
bool operator==(const OpTensor &t0, const OpTensor &t1);
bool operator==(const Padding2D &pad0, const Padding2D &pad1);
bool operator==(const Conv2dDescriptor &conv2d0, const Conv2dDescriptor &conv2d1);
-bool operator==(const AddDescriptor &, const AddDescriptor &);
+bool operator==(const ElementwiseDescriptor &, const ElementwiseDescriptor &);
+bool operator==(const FloorDescriptor &, const FloorDescriptor &);
} // namespace dynamic_fusion
} // namespace experimental
diff --git a/examples/dynamic_fusion/cl_fused_conv2d_elementwise_add.cpp b/examples/dynamic_fusion/cl_fused_conv2d_elementwise_add.cpp
index 285509b586..8aee67e8a4 100644
--- a/examples/dynamic_fusion/cl_fused_conv2d_elementwise_add.cpp
+++ b/examples/dynamic_fusion/cl_fused_conv2d_elementwise_add.cpp
@@ -218,12 +218,12 @@ public:
/// @snippet dynamic_fusion/cl_fused_conv2d_elementwise_add.cpp Add Elementwise Add Operator
// [Add Elementwise Add Operator]
- auto t_l1_addend_info = TensorInfo(t_l1_addend_shape, 1, data_type, data_layout);
- auto t_dst_info = TensorInfo();
- const auto op_t_l1_addend = add_tensor(op_graph, t_l1_addend_info);
- const auto op_t_dst = add_tensor(op_graph, t_dst_info);
- AddDescriptor add_desc{};
- add_op_elementwise_add(op_graph, add_desc, op_t_acc, op_t_l1_addend, op_t_dst);
+ auto t_l1_addend_info = TensorInfo(t_l1_addend_shape, 1, data_type, data_layout);
+ auto t_dst_info = TensorInfo();
+ const auto op_t_l1_addend = add_tensor(op_graph, t_l1_addend_info);
+ const auto op_t_dst = add_tensor(op_graph, t_dst_info);
+ ElementwiseDescriptor add_desc{ ArithmeticOperation::ADD };
+ add_op_elementwise_op(op_graph, add_desc, op_t_acc, op_t_l1_addend, op_t_dst);
// [Add Elementwise Add Operator]
/// @page example_dynamic_fusion_cl_conv2d_elementwise_add
diff --git a/filelist.json b/filelist.json
index 681cacdf02..e22ec1d18f 100644
--- a/filelist.json
+++ b/filelist.json
@@ -2097,7 +2097,8 @@
"dynamic_fusion": [
"src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp",
"src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp",
- "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp",
+ "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp",
+ "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp",
"src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.cpp",
"src/gpu/cl/kernels/experimental/dynamic_fusion/ClCompositeKernel.cpp",
diff --git a/src/core/CL/cl_kernels/common/gemmlowp.cl b/src/core/CL/cl_kernels/common/gemmlowp.cl
index 9fad66df68..53ce296948 100644
--- a/src/core/CL/cl_kernels/common/gemmlowp.cl
+++ b/src/core/CL/cl_kernels/common/gemmlowp.cl
@@ -703,7 +703,7 @@ __kernel void gemmlowp_mm_reshaped_only_rhs_t
a_offset_s32[0].v *= A_OFFSET;
- T_ADD_BROADCAST_X(int, M0, N0, offset_s32, a_offset_s32, offset_s32);
+ T_ELTWISE_BROADCAST_ADD_X(int, M0, N0, offset_s32, a_offset_s32, offset_s32);
#endif // defined(A_OFFSET)
#if defined(B_OFFSET)
@@ -728,7 +728,7 @@ __kernel void gemmlowp_mm_reshaped_only_rhs_t
T_LOAD(int, 1, N0, BUFFER, biases, xo, 0, 1, 0, bias);
- T_ADD_BROADCAST_X(int, M0, N0, offset_s32, bias, offset_s32);
+ T_ELTWISE_BROADCAST_ADD_X(int, M0, N0, offset_s32, bias, offset_s32);
#endif // defined(ADD_BIAS)
LOOP_UNROLLING(int, i, 0, 1, M0,
diff --git a/src/core/CL/cl_kernels/nhwc/direct_convolution.cl b/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
index f1b422a68f..d34e24b436 100644
--- a/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
+++ b/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -233,7 +233,7 @@ __kernel void direct_convolution_nhwc(
T_LOAD(BIA_DATA_TYPE, 1, N0, BUFFER, bia, cout, 0, 1, 0, bias0);
// c = c + bias[broadcasted]
- T_ADD_BROADCAST_X(ACC_DATA_TYPE, M0, N0, c, bias0, c);
+ T_ELTWISE_BROADCAST_ADD_X(ACC_DATA_TYPE, M0, N0, c, bias0, c);
#endif // HAS_BIAS
diff --git a/src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl b/src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl
index 587f3984ab..807b990e82 100644
--- a/src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl
+++ b/src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -254,7 +254,7 @@ __kernel void direct_convolution3d_ndhwc(
}
// c = c + bias[broadcasted]
- T_ADD_BROADCAST_X(ACC_DATA_TYPE, M0, N0, c, bias0, c);
+ T_ELTWISE_BROADCAST_ADD_X(ACC_DATA_TYPE, M0, N0, c, bias0, c);
#endif // HAS_BIAS
diff --git a/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl b/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl
index 4f57a81e7b..b24a6ae85f 100644
--- a/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl
+++ b/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -175,7 +175,7 @@ __kernel void dwc_native_fp_nhwc(
T_LOAD(BIA_DATA_TYPE, 1, N0, BUFFER, bia, (cout * DEPTH_MULTIPLIER) + d, 0, 0, 0, bias0);
// c = c + bias[broadcasted]
- T_ADD_BROADCAST_X(ACC_DATA_TYPE, M0, N0, c, bias0, c);
+ T_ELTWISE_BROADCAST_ADD_X(ACC_DATA_TYPE, M0, N0, c, bias0, c);
#endif // HAS_BIAS
T_ACTIVATION(ACC_DATA_TYPE, M0, N0, ACTIVATION_TYPE, A_VAL, B_VAL, c, c);
diff --git a/src/core/CL/cl_kernels/nhwc/dwc_native_quantized_nhwc.cl b/src/core/CL/cl_kernels/nhwc/dwc_native_quantized_nhwc.cl
index ec2593af71..263a23ef28 100644
--- a/src/core/CL/cl_kernels/nhwc/dwc_native_quantized_nhwc.cl
+++ b/src/core/CL/cl_kernels/nhwc/dwc_native_quantized_nhwc.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -236,7 +236,7 @@ __kernel void dwc_native_quantized_nhwc(
T_LOAD(BIA_DATA_TYPE, 1, N0, BUFFER, bia, cout * DEPTH_MULTIPLIER + d, 0, 0, 0, bias0);
// c = c + bias[broadcasted]
- T_ADD_BROADCAST_X(ACC_DATA_TYPE, M0, N0, c, bias0, c);
+ T_ELTWISE_BROADCAST_ADD_X(ACC_DATA_TYPE, M0, N0, c, bias0, c);
#endif // HAS_BIAS
T_LOAD_MULTIPLIERS_SHIFT(QUANTIZATION_TYPE);
diff --git a/src/core/CL/cl_kernels/nhwc/winograd_output_transform.cl b/src/core/CL/cl_kernels/nhwc/winograd_output_transform.cl
index bab2ee850c..0883cd99c8 100644
--- a/src/core/CL/cl_kernels/nhwc/winograd_output_transform.cl
+++ b/src/core/CL/cl_kernels/nhwc/winograd_output_transform.cl
@@ -111,7 +111,7 @@ __kernel void winograd_output_transform_2x2_7x7_nhwc(
T_LOAD(DATA_TYPE, 1, N0, BUFFER, bias, cout, 0, 1, 0, b);
- T_ADD_BROADCAST_X(DATA_TYPE, 2, N0, out, b, out);
+ T_ELTWISE_BROADCAST_ADD_X(DATA_TYPE, 2, N0, out, b, out);
#endif // defined(HAS_BIAS)
T_ACTIVATION(DATA_TYPE, 2, N0, ACTIVATION_TYPE, A_VAL, B_VAL, out, out);
@@ -177,7 +177,7 @@ __kernel void winograd_output_transform_2x2_7x7_nhwc(
T_LOAD(DATA_TYPE, 1, N0, BUFFER, bias, cout, 0, 1, 0, b);
- T_ADD_BROADCAST_X(DATA_TYPE, 4, N0, out, b, out);
+ T_ELTWISE_BROADCAST_ADD_X(DATA_TYPE, 4, N0, out, b, out);
#endif // defined(HAS_BIAS)
T_ACTIVATION(DATA_TYPE, 4, N0, ACTIVATION_TYPE, A_VAL, B_VAL, out, out);
@@ -287,7 +287,7 @@ __kernel void winograd_output_transform_4x4_3x3_nhwc(
T_LOAD(DATA_TYPE, 1, N0, BUFFER, bias, cout, 0, 1, 0, b);
// c = c + bias[broadcasted]
- T_ADD_BROADCAST_X(DATA_TYPE, 4, N0, out, b, out);
+ T_ELTWISE_BROADCAST_ADD_X(DATA_TYPE, 4, N0, out, b, out);
#endif // HAS_BIAS
int x_out = (mout % NUM_TILES_X) * OUTPUT_TILE_W;
@@ -374,7 +374,7 @@ __kernel void winograd_output_transform_4x4_3x3_nhwc(
T_LOAD(DATA_TYPE, 1, N0, BUFFER, bias, cout, 0, 1, 0, b);
// c = c + bias[broadcasted]
- T_ADD_BROADCAST_X(DATA_TYPE, 16, N0, out, b, out);
+ T_ELTWISE_BROADCAST_ADD_X(DATA_TYPE, 16, N0, out, b, out);
#endif // HAS_BIAS
int x_out = (mout % NUM_TILES_X) * OUTPUT_TILE_W;
@@ -488,7 +488,7 @@ __kernel void winograd_output_transform_4x4_5x5_nhwc(
T_LOAD(DATA_TYPE, 1, N0, BUFFER, bias, cout, 0, 1, 0, b);
// c = c + bias[broadcasted]
- T_ADD_BROADCAST_X(DATA_TYPE, 4, N0, out, b, out);
+ T_ELTWISE_BROADCAST_ADD_X(DATA_TYPE, 4, N0, out, b, out);
#endif // HAS_BIAS
int x_out = (mout % NUM_TILES_X) * OUTPUT_TILE_W;
@@ -586,7 +586,7 @@ __kernel void winograd_output_transform_4x4_5x5_nhwc(
T_LOAD(DATA_TYPE, 1, N0, BUFFER, bias, cout, 0, 1, 0, b);
// c = c + bias[broadcasted]
- T_ADD_BROADCAST_X(DATA_TYPE, 16, N0, out, b, out);
+ T_ELTWISE_BROADCAST_ADD_X(DATA_TYPE, 16, N0, out, b, out);
#endif // HAS_BIAS
int x_out = (mout % NUM_TILES_X) * OUTPUT_TILE_W;
diff --git a/src/core/CL/cl_kernels/tile_helpers.h b/src/core/CL/cl_kernels/tile_helpers.h
index 69898e1bb4..0ce343e3ec 100644
--- a/src/core/CL/cl_kernels/tile_helpers.h
+++ b/src/core/CL/cl_kernels/tile_helpers.h
@@ -970,6 +970,9 @@
#define ACT_OP_QUANTIZED(op, DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x) op##_op_quantized(DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x)
#define ACTIVATION_QUANTIZED(op, DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x) ACT_OP_QUANTIZED(op, DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x)
+#define T_ADD(A_VAL, B_VAL) ((A_VAL) + (B_VAL))
+#define T_DIV(A_VAL, B_VAL) ((A_VAL) / (B_VAL))
+
/** Element-wise activation for quantized types
*
* @note Performs: activation(LHS) = DST
@@ -1014,11 +1017,15 @@
}) \
})
-/** Element-wise addition with RHS broadcasted (RHS has the X dimension only)
+#define T_ELTWISE_BROADCAST_ADD_X(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_X(T_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
+#define T_ELTWISE_BROADCAST_DIV_X(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_X(T_DIV, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
+
+/** Element-wise operation with RHS broadcasted (RHS has the X dimension only)
*
- * @note Performs: LHS + RHS[broadcasted] = DST
+ * @note Performs: LHS OP RHS[broadcasted] = DST
* @note Both tiles must have same data type
*
+ * @param[in] T_ELWISE_OP Elementwise operator to perform
* @param[in] DST_DATA_TYPE DST data type
* @param[in] M0 Number of LHS rows
* @param[in] N0 Number of LHS columns
@@ -1026,19 +1033,23 @@
* @param[in] rhs RHS tile
* @param[out] dst DST tile
*/
-#define T_ADD_BROADCAST_X(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) \
+#define T_ELTWISE_BROADCAST_X(T_ELWISE_OP, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) \
({ \
LOOP_UNROLLING(int, _m0, 0, 1, M0, \
{ \
- dst[_m0].v = CONVERT(lhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)) + CONVERT(rhs[0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)); \
+ dst[_m0].v = T_ELWISE_OP(CONVERT(lhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)), CONVERT(rhs[0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0))); \
}) \
})
-/** Element-wise addition between two tiles (LHS and RHS)
+#define T_ELTWISE_ADD(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE(T_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
+#define T_ELTWISE_DIV(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE(T_DIV, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
+
+/** Element-wise operation between two tiles (LHS and RHS)
*
- * @note Performs: LHS + RHS = DST
+ * @note Performs: LHS OP RHS = DST
* @note Both tiles must have same data type
*
+ * @param[in] T_ELWISE_OP Elementwise operator to perform
* @param[in] DST_DATA_TYPE DST data type
* @param[in] M0 Number of LHS rows
* @param[in] N0 Number of LHS columns
@@ -1046,11 +1057,30 @@
* @param[in] rhs RHS tile
* @param[out] dst DST tile
*/
-#define T_ADD(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) \
+#define T_ELTWISE(T_ELWISE_OP, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) \
+ ({ \
+ LOOP_UNROLLING(int, _m0, 0, 1, M0, \
+ { \
+ dst[_m0].v = T_ELWISE_OP(CONVERT(lhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)), CONVERT(rhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0))); \
+ }) \
+ })
+
+/** Floor operation on a tile
+ *
+ * @note Performs: floor(SRC) = DST
+ * @note Both tiles must have same data type
+ *
+ * @param[in] DST_DATA_TYPE DST data type
+ * @param[in] M0 Number of SRC rows
+ * @param[in] N0 Number of SRC columns
+ * @param[in] src LHS tile
+ * @param[out] dst DST tile
+ */
+#define T_FLOOR(DST_DATA_TYPE, M0, N0, src, dst) \
({ \
LOOP_UNROLLING(int, _m0, 0, 1, M0, \
{ \
- dst[_m0].v = CONVERT(lhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)) + CONVERT(rhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)); \
+ dst[_m0].v = floor(CONVERT(src[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0))); \
}) \
})
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp
index 0be2ba02b5..9b6daae619 100644
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp
+++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp
@@ -55,18 +55,32 @@ Status add_tensor(ClKernelBlueprint &kernel_blueprint, ITensorInfo *tensor_info,
return Status{};
}
-Status add_kcomp_eltwise_add(ClKernelBlueprint &kernel_blueprint, const ClEltwiseAddKernelDescriptor &,
- ArgumentID src0_id, ArgumentID src1_id, ArgumentID &dst_id)
+Status add_kcomp_eltwise_op(ClKernelBlueprint &kernel_blueprint, const ClElementwiseKernelDescriptor &desc,
+ ArgumentID src0_id, ArgumentID src1_id, ArgumentID &dst_id)
{
kernel_blueprint.impl().add_component(
- std::make_unique<ClElementwiseAddKernelComponent>(
+ std::make_unique<ClElementwiseKernelComponent>(
&kernel_blueprint,
+ desc,
SharedVarLink{ src0_id, SharedVarIO::Input },
SharedVarLink{ src1_id, SharedVarIO::Input },
SharedVarLink{ dst_id, SharedVarIO::Output }));
return Status{};
}
+
+Status add_kcomp_floor(ClKernelBlueprint &kernel_blueprint, const ClFloorKernelDescriptor &,
+ ArgumentID src_id, ArgumentID &dst_id)
+{
+ kernel_blueprint.impl().add_component(
+ std::make_unique<ClFloorKernelComponent>(
+ &kernel_blueprint,
+ SharedVarLink{ src_id, SharedVarIO::Input },
+ SharedVarLink{ dst_id, SharedVarIO::Output }));
+
+ return Status{};
+}
+
Status add_kcomp_activation(ClKernelBlueprint &, const ClActivationKernelDescriptor &, ArgumentID, ArgumentID &)
{
return Status{};
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h
index 067e9737e3..463fc5e7cf 100644
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h
+++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h
@@ -59,9 +59,13 @@ public:
};
///// Kernel Components /////
-/** Component: Eltwise Add */
-Status add_kcomp_eltwise_add(ClKernelBlueprint &, const ClEltwiseAddKernelDescriptor &, ArgumentID src0_id,
- ArgumentID src1_id, ArgumentID &dst_id);
+/** Component: Eltwise Operator */
+Status add_kcomp_eltwise_op(ClKernelBlueprint &, const ClElementwiseKernelDescriptor &, ArgumentID src0_id,
+ ArgumentID src1_id, ArgumentID &dst_id);
+
+/** Component: Floor */
+Status add_kcomp_floor(ClKernelBlueprint &, const ClFloorKernelDescriptor &, ArgumentID src_id,
+ ArgumentID &dst_id);
/** Component: Activation */
Status add_kcomp_activation(ClKernelBlueprint &, const ClActivationKernelDescriptor &, ArgumentID src_id, ArgumentID &dst_id);
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp
index b63e2167b7..811cd79811 100644
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp
+++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp
@@ -237,7 +237,7 @@ std::string ClDirectConvolutionKernelComponent::get_component_code() const
T_LOAD({{BIA_DATA_TYPE}}, 1, N0, BUFFER, {{bias}}, cout, 0, 1, 0, bias0);
// c = c + bias[broadcasted]
- T_ADD_BROADCAST_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, bias0, {{dst}});
+ T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, bias0, {{dst}});
)_";
}
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp
index 965a68f51d..24a9eee9a3 100644
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp
+++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp
@@ -23,7 +23,7 @@
*/
#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.h"
+#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h"
#include "arm_compute/core/Validate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -34,17 +34,17 @@ namespace experimental
{
namespace dynamic_fusion
{
-ComponentType ClElementwiseAddKernelComponent::get_component_type() const
+ComponentType ClElementwiseKernelComponent::get_component_type() const
{
return ComponentType::Simple;
}
-std::set<std::string> ClElementwiseAddKernelComponent::get_headers_list() const
+std::set<std::string> ClElementwiseKernelComponent::get_headers_list() const
{
return std::set<std::string> { "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h", "tile_helpers.h" };
}
-Window ClElementwiseAddKernelComponent::get_window() const
+Window ClElementwiseKernelComponent::get_window() const
{
const ITensorInfo *lhs_info = _blueprint->impl().get_kernel_argument_info(_lhs.arg_id);
const ITensorInfo *rhs_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id);
@@ -64,7 +64,7 @@ Window ClElementwiseAddKernelComponent::get_window() const
return win;
}
-std::string ClElementwiseAddKernelComponent::get_component_code() const
+std::string ClElementwiseKernelComponent::get_component_code() const
{
std::string code;
const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument;
@@ -72,7 +72,7 @@ std::string ClElementwiseAddKernelComponent::get_component_code() const
if(is_root)
{
return R"_(
- //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_ADD ---------------------
+ //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_OP ---------------------
// IN_0(LHS) {{lhs}}
// IN_1(RHS) {{rhs}}
// OUT(dst, accum) {{dst}}
@@ -87,19 +87,19 @@ std::string ClElementwiseAddKernelComponent::get_component_code() const
T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{rhs}}, cout, mout, 1, {{rhs}}_stride_y, rhs_tile);
#if defined(IS_BROADCAST)
- T_ADD_BROADCAST_X({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}});
+ T_ELTWISE_BROADCAST_{{ELTWISE_OP}}_X({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}});
#else // !defined(IS_BROADCAST)
- T_ADD({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}});
+ T_ELTWISE_{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}});
#endif // defined(IS_BROADCAST)
}
- //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_ADD ---------------------
+ //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_OP ---------------------
)_";
}
else
{
return R"_(
- //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_ADD ---------------------
+ //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_OP ---------------------
// IN_0/Out(Accumulator) {{acc}}
// IN_1(Addend) {{addend}}
@@ -110,17 +110,17 @@ std::string ClElementwiseAddKernelComponent::get_component_code() const
T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{addend}}, cout, mout, 1, {{addend}}_stride_y, addend_tile);
#if defined(IS_BROADCAST)
- T_ADD_BROADCAST_X({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}});
+ T_ELTWISE_BROADCAST_{{ELTWISE_OP}}_X({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}});
#else // !defined(IS_BROADCAST)
- T_ADD({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}});
+ T_ELTWISE_{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}});
#endif // defined(IS_BROADCAST)
}
- //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_ADD ---------------------
+ //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_OP ---------------------
)_";
}
}
-CLBuildOptions ClElementwiseAddKernelComponent::generate_build_options() const
+CLBuildOptions ClElementwiseKernelComponent::generate_build_options() const
{
const auto t_src_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id);
const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
@@ -137,7 +137,7 @@ CLBuildOptions ClElementwiseAddKernelComponent::generate_build_options() const
return build_opts;
}
-std::string ClElementwiseAddKernelComponent::generate_config_id() const
+std::string ClElementwiseKernelComponent::generate_config_id() const
{
auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
std::string config_id{};
@@ -151,7 +151,7 @@ std::string ClElementwiseAddKernelComponent::generate_config_id() const
return config_id;
}
-void ClElementwiseAddKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const
+void ClElementwiseKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const
{
const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument;
vtable.add(_lhs, _blueprint->impl().group(_lhs.arg_id), ClKernelArgDescriptor(_lhs.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "lhs");
@@ -162,7 +162,7 @@ void ClElementwiseAddKernelComponent::allocate_shared_vars(SharedVarTable &vtabl
}
}
-ClElementwiseAddKernelComponent::TagLUT ClElementwiseAddKernelComponent::get_tag_lut(const SharedVarTable &vtable) const
+ClElementwiseKernelComponent::TagLUT ClElementwiseKernelComponent::get_tag_lut(const SharedVarTable &vtable) const
{
TagLUT lut{};
const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
@@ -199,6 +199,18 @@ ClElementwiseAddKernelComponent::TagLUT ClElementwiseAddKernelComponent::get_tag
// Local build options
lut["meta_kernel_id"] = id();
lut["DATA_TYPE"] = get_cl_type_from_data_type(t_dst_info->data_type());
+
+ switch(_desc.eltwise.op)
+ {
+ case ArithmeticOperation::DIV:
+ lut["ELTWISE_OP"] = "DIV";
+ break;
+ case ArithmeticOperation::ADD:
+ lut["ELTWISE_OP"] = "ADD";
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Arithmetic Operation not supported");
+ }
return lut;
}
} // namespace dynamic_fusion
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h
index 5f8b1569ac..91b14ffafa 100644
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.h
+++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h
@@ -34,11 +34,11 @@ namespace experimental
{
namespace dynamic_fusion
{
-class ClElementwiseAddKernelComponent : public IClKernelComponent
+class ClElementwiseKernelComponent : public IClKernelComponent
{
public:
- ClElementwiseAddKernelComponent(ClKernelBlueprint *blueprint, const Link &lhs, const Link &rhs, const Link &dst)
- : IClKernelComponent(blueprint), _lhs{ lhs }, _rhs{ rhs }, _dst{ dst }
+ ClElementwiseKernelComponent(ClKernelBlueprint *blueprint, const ClElementwiseKernelDescriptor &desc, const Link &lhs, const Link &rhs, const Link &dst)
+ : IClKernelComponent(blueprint), _desc{ desc }, _lhs{ lhs }, _rhs{ rhs }, _dst{ dst }
{
}
@@ -63,9 +63,10 @@ public:
}
private:
- Link _lhs{};
- Link _rhs{};
- Link _dst{};
+ ClElementwiseKernelDescriptor _desc{};
+ Link _lhs{};
+ Link _rhs{};
+ Link _dst{};
};
} // namespace dynamic_fusion
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp
new file mode 100644
index 0000000000..87cc110561
--- /dev/null
+++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
+
+#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h"
+#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+ComponentType ClFloorKernelComponent::get_component_type() const
+{
+ return ComponentType::Simple;
+}
+
+std::set<std::string> ClFloorKernelComponent::get_headers_list() const
+{
+ return std::set<std::string> { "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h", "tile_helpers.h" };
+}
+
+Window ClFloorKernelComponent::get_window() const
+{
+ const ITensorInfo *src_info = _blueprint->impl().get_kernel_argument_info(_src.arg_id);
+ ITensorInfo *dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
+
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src_info, dst_info);
+ auto_init_if_empty(*dst_info, src_info->tensor_shape(), 1, src_info->data_type());
+
+ const unsigned int vector_size_byte_opencl = 16;
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / dst_info->element_size(), dst_info->dimension(0));
+ Window win = calculate_max_window(*dst_info, Steps(num_elems_processed_per_iteration));
+
+ return win;
+}
+
+std::string ClFloorKernelComponent::get_component_code() const
+{
+ return R"_(
+ //------------------ START KERNEL {{meta_kernel_id}} FLOOR ---------------------
+ // IN_0(src) {{src}}
+ // OUT(dst, accum) {{dst}}
+ TILE({{DATA_TYPE}}, M0, N0, {{dst}});
+ {
+ TILE({{DATA_TYPE}}, M0, N0, src_tile);
+
+ T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{src}}, cout, mout, 1, {{src}}_stride_y, src_tile);
+ T_FLOOR({{DATA_TYPE}}, M0, N0, src_tile, {{dst}});
+ }
+
+ //------------------ END KERNEL {{meta_kernel_id}} FLOOR ---------------------
+)_";
+}
+
+CLBuildOptions ClFloorKernelComponent::generate_build_options() const
+{
+ CLBuildOptions build_opts{};
+
+ const auto n0 = _blueprint->impl().get_execution_window().x().step();
+ const auto m0 = _blueprint->impl().get_execution_window().y().step();
+
+ const auto dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
+ const unsigned int partial_store_n0 = dst_info->dimension(0) % n0;
+ build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
+ build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
+ build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
+
+ return build_opts;
+}
+
+std::string ClFloorKernelComponent::generate_config_id() const
+{
+ auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
+ std::string config_id{};
+ config_id += lower_string(string_from_data_type(t_dst_info->data_type()));
+ config_id += "_";
+ config_id += support::cpp11::to_string(t_dst_info->dimension(0));
+ config_id += "_";
+ config_id += support::cpp11::to_string(t_dst_info->dimension(1));
+ config_id += "_";
+ config_id += lower_string(string_from_data_layout(t_dst_info->data_layout()));
+ return config_id;
+}
+
+void ClFloorKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const
+{
+ vtable.add(_src, _blueprint->impl().group(_src.arg_id), ClKernelArgDescriptor(_src.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "src");
+ vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst");
+}
+
+ClFloorKernelComponent::TagLUT ClFloorKernelComponent::get_tag_lut(const SharedVarTable &vtable) const
+{
+ TagLUT lut{};
+ const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
+ // Arguments and global shared variables
+ lut["src"] = vtable.get(_src);
+ lut["dst"] = vtable.get(_dst);
+ lut["meta_kernel_id"] = id();
+ lut["DATA_TYPE"] = get_cl_type_from_data_type(t_dst_info->data_type());
+ return lut;
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h
new file mode 100644
index 0000000000..5463e233d4
--- /dev/null
+++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
+
+#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLFLOORKERNELCOMPONENT_H
+#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLFLOORKERNELCOMPONENT_H
+
+#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class ClFloorKernelComponent : public IClKernelComponent
+{
+public:
+ ClFloorKernelComponent(ClKernelBlueprint *blueprint, const Link &src, const Link &dst)
+ : IClKernelComponent(blueprint), _src{ src }, _dst{ dst }
+ {
+ }
+
+ ComponentType get_component_type() const override;
+ std::set<std::string> get_headers_list() const override;
+ std::string get_component_code() const override;
+ Window get_window() const override;
+ CLBuildOptions generate_build_options() const override;
+ std::string generate_config_id() const override;
+
+ virtual std::vector<Link> get_links() const override
+ {
+ return { _src, _dst };
+ }
+
+ virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override;
+ virtual void allocate_shared_vars(SharedVarTable &vtable) const override;
+
+ virtual std::string name() const override
+ {
+ return "floor_" + std::to_string(id());
+ }
+
+private:
+ Link _src{};
+ Link _dst{};
+};
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif // ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLFLOORKERNELCOMPONENT_H
+#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h
index 26e50523a9..3f99dd5553 100644
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h
+++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h
@@ -27,7 +27,8 @@
#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_CLKERNELCOMPONENTS_H
#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h"
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.h"
+#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h"
+#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h"
#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h"
#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_CLKERNELCOMPONENTS_H
diff --git a/src/core/experimental/dynamic_fusion/OperatorGraph.cpp b/src/core/experimental/dynamic_fusion/OperatorGraph.cpp
index a335e5aada..bd88afdb47 100644
--- a/src/core/experimental/dynamic_fusion/OperatorGraph.cpp
+++ b/src/core/experimental/dynamic_fusion/OperatorGraph.cpp
@@ -198,7 +198,7 @@ void force_conv2d_method(OperatorGraph &graph, Operator conv2d, ConvolutionMetho
node->set_method(method);
}
-Operator add_op_elementwise_add(OperatorGraph &graph, const AddDescriptor &desc, OpTensor lhs, OpTensor rhs, OpTensor dst)
+Operator add_op_elementwise_op(OperatorGraph &graph, const ElementwiseDescriptor &desc, OpTensor lhs, OpTensor rhs, OpTensor dst)
{
auto id = graph.impl()->graph.add_operator({ rhs.id(), lhs.id() }, { dst.id() });
check_dependency_graph_op_success(graph, id.first);
@@ -224,7 +224,36 @@ Operator add_op_elementwise_add(OperatorGraph &graph, const AddDescriptor &desc,
tensors.add_const_tensor(ACL_SRC_0, graph.impl()->tensors[lhs.id()].get());
tensors.add_const_tensor(ACL_SRC_1, graph.impl()->tensors[rhs.id()].get());
tensors.add_const_tensor(ACL_DST_0, graph.impl()->tensors[dst.id()].get());
- graph.impl()->add_node<AddContent>(id.second, desc, tensors);
+ graph.impl()->add_node<ElementwiseContent>(id.second, desc, tensors);
+ check_multiple_roots(graph);
+
+ return op_node;
+}
+
+Operator add_op_floor(OperatorGraph &graph, const FloorDescriptor &desc, OpTensor src, OpTensor dst)
+{
+ auto id = graph.impl()->graph.add_operator({ src.id() }, { dst.id() });
+ check_dependency_graph_op_success(graph, id.first);
+
+ Operator op_node(id.second);
+
+ // Infer TensorInfo
+ auto node_src = graph.impl()->tensors[src.id()]->get_tensor_info();
+ OpTensorContent *node_dst = graph.impl()->tensors[dst.id()].get();
+
+ if(node_dst->get_tensor_info()->total_size() == 0)
+ {
+ auto_init_if_empty(*(node_dst->get_tensor_info()), *node_src);
+ }
+
+ // Check execution space
+ auto dst_info = node_dst->get_tensor_info();
+ check_execution_shape(graph, *dst_info);
+
+ ITensorDescPack<OpTensorContent> tensors;
+ tensors.add_const_tensor(ACL_SRC_0, graph.impl()->tensors[src.id()].get());
+ tensors.add_const_tensor(ACL_DST_0, graph.impl()->tensors[dst.id()].get());
+ graph.impl()->add_node<FloorContent>(id.second, desc, tensors);
check_multiple_roots(graph);
return op_node;
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h
index a9ccf908f0..f10e97e3e9 100644
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h
+++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h
@@ -42,14 +42,24 @@ struct ClDirectConv2dKernelDescriptor
Conv2dDescriptor conv2d{};
};
-struct ClEltwiseAddKernelDescriptor
+struct ClElementwiseKernelDescriptor
{
- friend bool operator==(const ClEltwiseAddKernelDescriptor &desc0, const ClEltwiseAddKernelDescriptor &desc1)
+ friend bool operator==(const ClElementwiseKernelDescriptor &desc0, const ClElementwiseKernelDescriptor &desc1)
{
- return desc0.add == desc1.add;
+ return desc0.eltwise == desc1.eltwise;
}
- AddDescriptor add{};
+ ElementwiseDescriptor eltwise{};
};
+
+struct ClFloorKernelDescriptor
+{
+ friend bool operator==(const ClFloorKernelDescriptor &desc0, const ClFloorKernelDescriptor &desc1)
+ {
+ return desc0.floor == desc1.floor;
+ }
+ FloorDescriptor floor{};
+};
+
struct ClActivationKernelDescriptor
{
friend bool operator==(const ClActivationKernelDescriptor &, const ClActivationKernelDescriptor &)
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp
index de58ce70ed..cab51a2ce6 100644
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp
+++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp
@@ -124,7 +124,7 @@ bool ClDirectConv2dKernel::operator==(const ClKernel &other) const
return config() == other.config() && tensors() == other.tensors() && desc == converted.desc;
}
-Status ClAddKernel::generate(ClKernelBlueprint &bp) const
+Status ClElementwiseKernel::generate(ClKernelBlueprint &bp) const
{
const auto lhs = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
const auto rhs = _tensors.get_const_tensor(TensorType::ACL_SRC_1);
@@ -137,11 +137,11 @@ Status ClAddKernel::generate(ClKernelBlueprint &bp) const
ArgumentID dst_id;
add_tensor(bp, dst->desc, dst_id, dst->id);
- add_kcomp_eltwise_add(bp, desc, lhs_id, rhs_id, dst_id);
+ add_kcomp_eltwise_op(bp, desc, lhs_id, rhs_id, dst_id);
return Status{};
}
-Status ClAddKernel::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst)
+Status ClElementwiseKernel::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst)
{
// 1. Check validity
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs, dst);
@@ -186,9 +186,61 @@ Status ClAddKernel::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, con
return Status{};
}
-bool ClAddKernel::operator==(const ClKernel &other) const
+bool ClElementwiseKernel::operator==(const ClKernel &other) const
{
- const auto converted = *utils::cast::polymorphic_downcast<const ClAddKernel *>(&other);
+ const auto converted = *utils::cast::polymorphic_downcast<const ClElementwiseKernel *>(&other);
+ return config() == other.config() && tensors() == other.tensors() && desc == converted.desc;
+}
+
+Status ClFloorKernel::generate(ClKernelBlueprint &bp) const
+{
+ const auto src = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
+ const auto dst = _tensors.get_const_tensor(TensorType::ACL_DST_0);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ArgumentID src_id;
+ add_tensor(bp, src->desc, src_id, src->id);
+ ArgumentID dst_id;
+ add_tensor(bp, dst->desc, dst_id, dst->id);
+
+ add_kcomp_floor(bp, desc, src_id, dst_id);
+ return Status{};
+}
+
+Status ClFloorKernel::validate(const ITensorInfo *src, const ITensorInfo *dst)
+{
+ // 1. Check validity
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+
+ // Matching data type
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
+
+ // Matching data layout
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst);
+
+ // All tensor infos are initialized
+ ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
+
+ // Device requirements are met
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
+
+ // dst shape is correct
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(src->tensor_shape(), dst->tensor_shape(), 0), "Wrong shape for dst");
+
+ // 2. Check support level
+
+ // Data type
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F32, DataType::F16);
+
+ // Data layout
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
+
+ return Status{};
+}
+
+bool ClFloorKernel::operator==(const ClKernel &other) const
+{
+ const auto converted = *utils::cast::polymorphic_downcast<const ClFloorKernel *>(&other);
return config() == other.config() && tensors() == other.tensors() && desc == converted.desc;
}
@@ -202,6 +254,7 @@ std::vector<const ClKernel *> traverse(const ClKernelGraph &graph)
}
return kernels;
}
+
std::vector<ClKernel *> traverse(ClKernelGraph &graph)
{
std::vector<ClKernel *> kernels;
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h
index 54e01ea850..c3580cfaca 100644
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h
+++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h
@@ -139,16 +139,16 @@ public:
ClDirectConv2dKernelDescriptor desc{};
};
-struct ClAddKernel : public ClKernel
+struct ClElementwiseKernel : public ClKernel
{
public:
Complexity complexity() const override
{
return Complexity::Simple;
}
- ClAddKernel() = default;
- ~ClAddKernel() override = default;
- ClAddKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig &config, const ClEltwiseAddKernelDescriptor &desc, const ITensorDescPack<ClKernelTensor> tensors)
+ ClElementwiseKernel() = default;
+ ~ClElementwiseKernel() override = default;
+ ClElementwiseKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig &config, const ClElementwiseKernelDescriptor &desc, const ITensorDescPack<ClKernelTensor> tensors)
: ClKernel{ graph, id, config, tensors }, desc{ desc }
{
}
@@ -156,7 +156,27 @@ public:
bool operator==(const ClKernel &other) const override;
Status generate(ClKernelBlueprint &bp) const override;
- ClEltwiseAddKernelDescriptor desc{};
+ ClElementwiseKernelDescriptor desc{};
+};
+
+struct ClFloorKernel : public ClKernel
+{
+public:
+ Complexity complexity() const override
+ {
+ return Complexity::Simple;
+ }
+ ClFloorKernel() = default;
+ ~ClFloorKernel() override = default;
+ ClFloorKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig &config, const ClFloorKernelDescriptor &desc, const ITensorDescPack<ClKernelTensor> tensors)
+ : ClKernel{ graph, id, config, tensors }, desc{ desc }
+ {
+ }
+ static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
+ bool operator==(const ClKernel &other) const override;
+ Status generate(ClKernelBlueprint &bp) const override;
+
+ ClFloorKernelDescriptor desc{};
};
struct ClKernelGraph
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp
index f971196729..274a2517bb 100644
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp
+++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp
@@ -113,9 +113,14 @@ bool operator==(const Conv2dDescriptor &conv2d0, const Conv2dDescriptor &conv2d1
return std::make_tuple(conv2d0.pad, conv2d0.stride, conv2d0.dilation) == std::make_tuple(conv2d1.pad, conv2d1.stride, conv2d1.dilation);
}
-bool operator==(const AddDescriptor &, const AddDescriptor &)
+bool operator==(const ElementwiseDescriptor &ed0, const ElementwiseDescriptor &ed1)
{
- return std::make_tuple() == std::make_tuple(); // Currently two Add ops are always the same
+ return ed0.op == ed1.op; // Compare Arithmatic Operations of two ElementwiseDescriptor objects
+}
+
+bool operator==(const FloorDescriptor &, const FloorDescriptor &)
+{
+ return std::make_tuple() == std::make_tuple(); // Currently two Floor ops are always the same
}
bool Conv2dContent::operator==(const OperatorContent &other) const
@@ -124,9 +129,15 @@ bool Conv2dContent::operator==(const OperatorContent &other) const
return desc == converted.desc;
}
-bool AddContent::operator==(const OperatorContent &other) const
+bool ElementwiseContent::operator==(const OperatorContent &other) const
+{
+ const auto converted = *utils::cast::polymorphic_downcast<const ElementwiseContent *>(&other);
+ return desc == converted.desc;
+}
+
+bool FloorContent::operator==(const OperatorContent &other) const
{
- const auto converted = *utils::cast::polymorphic_downcast<const AddContent *>(&other);
+ const auto converted = *utils::cast::polymorphic_downcast<const FloorContent *>(&other);
return desc == converted.desc;
}
@@ -311,7 +322,7 @@ Status Conv2dContent::translate_direct_conv2d(ClKernelGraph &kernel_graph) const
return Status{};
}
-Status AddContent::translate(ClKernelGraph &kernel_graph) const
+Status ElementwiseContent::translate(ClKernelGraph &kernel_graph) const
{
const auto lhs = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
const auto rhs = _tensors.get_const_tensor(TensorType::ACL_SRC_1);
@@ -338,16 +349,46 @@ Status AddContent::translate(ClKernelGraph &kernel_graph) const
DependencyGraph::Id add_id;
ClKernelConfig config{ UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }, TileDescriptor{}, StoreType::TStoreIndirectWidthSelect };
- st = ClAddKernel::validate(lhs->desc, rhs->desc, dst->desc);
+ st = ClElementwiseKernel::validate(lhs->desc, rhs->desc, dst->desc);
ARM_COMPUTE_RETURN_ON_ERROR(st);
- st = kernel_graph.add_kernel<ClAddKernel>(config, ClEltwiseAddKernelDescriptor{ desc }, tensors, add_id);
+ st = kernel_graph.add_kernel<ClElementwiseKernel>(config, ClElementwiseKernelDescriptor{ desc }, tensors, add_id);
ARM_COMPUTE_RETURN_ON_ERROR(st);
ARM_COMPUTE_UNUSED(add_id);
return Status{};
}
+Status FloorContent::translate(ClKernelGraph &kernel_graph) const
+{
+ const auto src = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
+ const auto dst = _tensors.get_const_tensor(TensorType::ACL_DST_0);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+
+ ITensorDescPack<ClKernelTensor> tensors;
+
+ DependencyGraph::Id src_id;
+ auto st = add_kernel_tensor(kernel_graph, *_graph, *src, src_id);
+ ARM_COMPUTE_RETURN_ON_ERROR(st);
+ tensors.add_const_tensor(ACL_SRC_0, kernel_graph.get_tensor(src_id));
+
+ DependencyGraph::Id dst_id;
+ st = add_kernel_tensor(kernel_graph, *_graph, *dst, dst_id);
+ ARM_COMPUTE_RETURN_ON_ERROR(st);
+ tensors.add_const_tensor(ACL_DST_0, kernel_graph.get_tensor(dst_id));
+
+ DependencyGraph::Id add_id;
+ ClKernelConfig config{ UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }, TileDescriptor{}, StoreType::TStoreIndirectWidthSelect };
+
+ st = ClFloorKernel::validate(src->desc, dst->desc);
+ ARM_COMPUTE_RETURN_ON_ERROR(st);
+
+ st = kernel_graph.add_kernel<ClFloorKernel>(config, ClFloorKernelDescriptor{ desc }, tensors, add_id);
+ ARM_COMPUTE_RETURN_ON_ERROR(st);
+
+ return Status{};
+}
+
std::vector<const OperatorContent *> traverse(const OperatorGraph::Implementation &graph)
{
std::vector<const OperatorContent *> ops;
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h
index 2786d610e1..b303cdb9fc 100644
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h
+++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h
@@ -157,19 +157,19 @@ private:
Status translate_direct_conv2d(ClKernelGraph &kernel_graph) const;
};
-class AddContent : public OperatorContent
+class ElementwiseContent : public OperatorContent
{
public:
- AddContent() = default;
- AddContent(const OperatorGraph::Implementation *graph, Id id, const AddDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors)
+ ElementwiseContent() = default;
+ ElementwiseContent(const OperatorGraph::Implementation *graph, Id id, const ElementwiseDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors)
: OperatorContent(graph, id, tensors), desc(desc)
{
}
- ~AddContent() = default;
- AddContent(const AddContent &) = default;
- AddContent &operator=(const AddContent &) = default;
- AddContent(AddContent &&) = default;
- AddContent &operator=(AddContent &&) = default;
+ ~ElementwiseContent() = default;
+ ElementwiseContent(const ElementwiseContent &) = default;
+ ElementwiseContent &operator=(const ElementwiseContent &) = default;
+ ElementwiseContent(ElementwiseContent &&) = default;
+ ElementwiseContent &operator=(ElementwiseContent &&) = default;
bool operator==(const OperatorContent &other) const override;
OperatorComplexity complexity() const override
{
@@ -178,7 +178,31 @@ public:
Status translate(ClKernelGraph &kernel_graph) const override;
private:
- AddDescriptor desc{};
+ ElementwiseDescriptor desc{};
+};
+
+class FloorContent : public OperatorContent
+{
+public:
+ FloorContent() = default;
+ FloorContent(const OperatorGraph::Implementation *graph, Id id, const FloorDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors)
+ : OperatorContent(graph, id, tensors), desc(desc)
+ {
+ }
+ ~FloorContent() = default;
+ FloorContent(const FloorContent &) = default;
+ FloorContent &operator=(const FloorContent &) = default;
+ FloorContent(FloorContent &&) = default;
+ FloorContent &operator=(FloorContent &&) = default;
+ bool operator==(const OperatorContent &other) const override;
+ OperatorComplexity complexity() const override
+ {
+ return OperatorComplexity::Simple;
+ }
+ Status translate(ClKernelGraph &kernel_graph) const override;
+
+private:
+ FloorDescriptor desc{};
};
struct OperatorGraph::Implementation
diff --git a/tests/validation/CL/UNIT/dynamic_fusion/ClCompositeKernel.cpp b/tests/validation/CL/UNIT/dynamic_fusion/ClCompositeKernel.cpp
index 96a845c36e..3ffbc077c6 100644
--- a/tests/validation/CL/UNIT/dynamic_fusion/ClCompositeKernel.cpp
+++ b/tests/validation/CL/UNIT/dynamic_fusion/ClCompositeKernel.cpp
@@ -74,8 +74,9 @@ TEST_CASE(MoveNet_SubGraph_1_DirectConv2d, framework::DatasetMode::ALL)
ClExecutionDescriptor exec_desc{};
Status st{};
- const auto data_type = DataType::F32;
- const auto conv_info = Conv2dDescriptor{ Padding2D{ 1U, 1U, 1U, 1U }, { 1U, 1U } /* stride */ };
+ const auto data_type = DataType::F32;
+ const auto conv_info = Conv2dDescriptor{ Padding2D{ 1U, 1U, 1U, 1U }, { 1U, 1U } /* stride */ };
+ const auto eltwise_info = ElementwiseDescriptor{ ArithmeticOperation::ADD };
const auto width = 7U;
const auto height = 6U;
@@ -99,7 +100,7 @@ TEST_CASE(MoveNet_SubGraph_1_DirectConv2d, framework::DatasetMode::ALL)
const auto m0 = (OFM > 16) ? ((data_type == DataType::F32) ? 2U : 4U) : 1U;
const ClDirectConv2dKernelDescriptor direct_conv2d_desc{ conv_info };
- const ClEltwiseAddKernelDescriptor eltwise_add_desc{};
+ const ClElementwiseKernelDescriptor eltwise_add_desc{ eltwise_info };
const TileDescriptor store_tile_info{ Size2D(n0, m0), Size2D(width, height), ClippingStrategy::TOP_LEFT };
ArgumentID src_id{ g_arg_placeholder };
@@ -119,7 +120,7 @@ TEST_CASE(MoveNet_SubGraph_1_DirectConv2d, framework::DatasetMode::ALL)
st = add_tensor(bp, &dst_info, dst_id);
st = add_kcomp_direct_conv2d(bp, direct_conv2d_desc, src_id, wei_id, bia_id, acc_id);
- st = add_kcomp_eltwise_add(bp, eltwise_add_desc, addend_id, acc_id, acc_1_id);
+ st = add_kcomp_eltwise_op(bp, eltwise_add_desc, addend_id, acc_id, acc_1_id);
st = add_kcomp_store(bp, StoreType::TStoreIndirectWidthSelect, acc_1_id, dst_id);
exec_desc.skip_sliding_window = true;
diff --git a/tests/validation/CL/UNIT/dynamic_fusion/Floor.cpp b/tests/validation/CL/UNIT/dynamic_fusion/Floor.cpp
new file mode 100644
index 0000000000..2b8f69e5e7
--- /dev/null
+++ b/tests/validation/CL/UNIT/dynamic_fusion/Floor.cpp
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
+#include "arm_compute/core/TensorInfo.h"
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/experimental/ClWorkload.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/experimental/ClCompositeOperator.h"
+#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/CL/UNIT/dynamic_fusion/Utils.h"
+#include "tests/validation/Validation.h"
+
+#include "tests/validation/reference/Floor.h"
+#include "tests/validation/reference/Permute.h"
+
+#ifdef ARM_COMPUTE_ASSERTS_ENABLED
+#include "tests/SimpleTensorPrinter.h"
+#endif /* ARM_COMPUTE_ASSERTS_ENABLED */
+
+using namespace arm_compute::experimental::dynamic_fusion;
+using namespace arm_compute::test::validation::utils;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CL)
+TEST_SUITE(UNIT)
+TEST_SUITE(DYNAMIC_FUSION)
+TEST_CASE(Operator_Floor_1_F32, framework::DatasetMode::ALL)
+{
+ /* Computation:
+ * out = floor(input)
+ */
+ const auto data_type = DataType::F32;
+ const auto data_layout = DataLayout::NHWC;
+ const auto t_shape = TensorShape(32, 16);
+ auto t_input_info = TensorInfo(t_shape, 1, data_type, data_layout);
+ auto t_dst_info = TensorInfo();
+
+ FloorDescriptor floor_desc{};
+
+ // Create reference
+ SimpleTensor<float> ref_t_input{ t_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC };
+
+ // Fill reference
+ fill<float>(ref_t_input, 0, library.get());
+
+ auto ref_t_input_nchw = reference::permute(ref_t_input, PermutationVector(1U, 2U, 0U));
+ auto t_dst_shape_nchw = t_shape;
+ permute(t_dst_shape_nchw, PermutationVector(1U, 2U, 0U));
+
+ auto ref_t_dst_nchw = reference::floor_layer(ref_t_input_nchw);
+ const auto ref_t_dst = reference::permute(ref_t_dst_nchw, PermutationVector(2U, 0U, 1U));
+
+ CLScheduler::get().default_reinit();
+ const auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ OperatorGraph op_graph;
+
+ const auto op_t_input = add_tensor(op_graph, t_input_info);
+ const auto op_t_dst = add_tensor(op_graph, t_dst_info);
+
+ add_op_floor(op_graph, floor_desc, op_t_input, op_t_dst);
+
+ const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } };
+ ClWorkload workload;
+ build(workload, op_graph, workload_ctx);
+
+ ClCompositeOperator op;
+ op.configure(cl_compile_ctx, workload);
+
+ // Construct tensors
+ CLTensor t_input{};
+ CLTensor t_dst{};
+
+ // Init tensors
+ t_input.allocator()->init(t_input_info);
+ t_dst.allocator()->init(t_dst_info);
+
+ // Allocate and fill tensors
+ t_input.allocator()->allocate();
+ t_dst.allocator()->allocate();
+ fill<float>(CLAccessor(t_input), 0, library.get());
+ // "Pack" tensors
+ OpTensorBinding bp_tensors({ { op_t_input, &t_input },
+ { op_t_dst, &t_dst }
+ });
+
+ // Populate prepare and run pack-maps (including allocating aux tensors)
+ ClAuxTensorData aux_tensor_data{};
+ TensorPackMap prepare_pack_map{};
+ TensorPackMap run_pack_map{};
+ bind_tensors(aux_tensor_data, prepare_pack_map, run_pack_map, workload, bp_tensors);
+
+ op.prepare(prepare_pack_map);
+ op.run(run_pack_map);
+ RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
+ validate(CLAccessor(t_dst), ref_t_dst_nchw, tolerance_f32);
+}
+
+TEST_SUITE_END() // DYNAMIC_FUSION
+TEST_SUITE_END() // UNIT
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file
diff --git a/tests/validation/CL/UNIT/dynamic_fusion/Integration_OperatorFuseMovenetSubGraph1.cpp b/tests/validation/CL/UNIT/dynamic_fusion/Integration_OperatorFuseMovenetSubGraph1.cpp
index fe8d23ef15..3a8b7c8ce8 100644
--- a/tests/validation/CL/UNIT/dynamic_fusion/Integration_OperatorFuseMovenetSubGraph1.cpp
+++ b/tests/validation/CL/UNIT/dynamic_fusion/Integration_OperatorFuseMovenetSubGraph1.cpp
@@ -77,8 +77,8 @@ TEST_CASE(Operator_Fuse_Movenet_SubGraph_1_F32, framework::DatasetMode::ALL)
auto t_acc_info = TensorInfo(); // Intermediate tensor for cond3
auto t_dst_info = TensorInfo();
- Conv2dDescriptor conv2d_desc{};
- AddDescriptor add_desc{};
+ Conv2dDescriptor conv2d_desc{};
+ ElementwiseDescriptor add_desc{ ArithmeticOperation::ADD };
// Create reference
SimpleTensor<float> ref_t_input{ t_input_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC };
@@ -119,7 +119,7 @@ TEST_CASE(Operator_Fuse_Movenet_SubGraph_1_F32, framework::DatasetMode::ALL)
auto conv2d = add_op_conv2d(op_graph, conv2d_desc, op_t_input, op_t_weight, op_t_acc);
force_conv2d_method(op_graph, conv2d, ConvolutionMethod::DIRECT);
- add_op_elementwise_add(op_graph, add_desc, op_t_acc, op_t_l1_addend, op_t_dst);
+ add_op_elementwise_op(op_graph, add_desc, op_t_acc, op_t_l1_addend, op_t_dst);
const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } };
ClWorkload workload;
@@ -180,8 +180,8 @@ TEST_CASE(DataType_QASYMM8, framework::DatasetMode::ALL)
auto t_acc_info = TensorInfo(t_dst_shape, 1, data_type, data_layout);
auto t_dst_info = TensorInfo(t_dst_shape, 1, data_type, data_layout);
- Conv2dDescriptor conv2d_desc{};
- AddDescriptor add_desc{};
+ Conv2dDescriptor conv2d_desc{};
+ ElementwiseDescriptor add_desc{};
OperatorGraph op_graph;
@@ -192,7 +192,7 @@ TEST_CASE(DataType_QASYMM8, framework::DatasetMode::ALL)
const auto op_t_dst = add_tensor(op_graph, t_dst_info);
auto conv2d = add_op_conv2d(op_graph, conv2d_desc, op_t_input, op_t_weight, op_t_acc);
- add_op_elementwise_add(op_graph, add_desc, op_t_acc, op_t_l1_addend, op_t_dst);
+ add_op_elementwise_op(op_graph, add_desc, op_t_acc, op_t_l1_addend, op_t_dst);
force_conv2d_method(op_graph, conv2d, ConvolutionMethod::DIRECT);
const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } };
@@ -290,7 +290,7 @@ TEST_CASE(Enlarging_Execution_Space, framework::DatasetMode::ALL)
auto t_dst_info = TensorInfo();
OperatorGraph op_graph;
- const auto add_desc = AddDescriptor{};
+ const auto add_desc = ElementwiseDescriptor{};
const auto op_t_l0_lhs = add_tensor(op_graph, t_l0_lhs_info);
const auto op_t_l0_rhs = add_tensor(op_graph, t_l0_rhs_info);
@@ -300,9 +300,9 @@ TEST_CASE(Enlarging_Execution_Space, framework::DatasetMode::ALL)
const auto op_t_l1_dst = add_tensor(op_graph, t_l1_dst_info); // temp accumulator; TensorInfo to be inferred
const auto op_t_dst = add_tensor(op_graph, t_dst_info);
- add_op_elementwise_add(op_graph, add_desc, op_t_l0_lhs, op_t_l0_rhs, op_t_l0_dst);
- add_op_elementwise_add(op_graph, add_desc, op_t_l0_dst, op_t_l1_rhs, op_t_l1_dst);
- add_op_elementwise_add(op_graph, add_desc, op_t_l1_dst, op_t_l2_lhs, op_t_dst);
+ add_op_elementwise_op(op_graph, add_desc, op_t_l0_lhs, op_t_l0_rhs, op_t_l0_dst);
+ add_op_elementwise_op(op_graph, add_desc, op_t_l0_dst, op_t_l1_rhs, op_t_l1_dst);
+ add_op_elementwise_op(op_graph, add_desc, op_t_l1_dst, op_t_l2_lhs, op_t_dst);
const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } };
ClWorkload workload;
@@ -334,7 +334,7 @@ TEST_CASE(Root_Simple_And_Complex, framework::DatasetMode::ALL)
OperatorGraph op_graph;
const auto conv2d_desc = Conv2dDescriptor{};
- const auto add_desc = AddDescriptor{};
+ const auto add_desc = ElementwiseDescriptor{};
const auto op_t_l0_0_input = add_tensor(op_graph, t_l0_0_input_info);
const auto op_t_l0_0_weight = add_tensor(op_graph, t_l0_0_weight_info);
@@ -345,8 +345,8 @@ TEST_CASE(Root_Simple_And_Complex, framework::DatasetMode::ALL)
const auto op_t_dst = add_tensor(op_graph, t_dst_info);
add_op_conv2d(op_graph, conv2d_desc, op_t_l0_0_input, op_t_l0_0_weight, op_t_l0_0_dst);
- add_op_elementwise_add(op_graph, add_desc, op_t_l0_1_lhs, op_t_l0_1_rhs, op_t_l0_1_dst);
- add_op_elementwise_add(op_graph, add_desc, op_t_l0_0_dst, op_t_l0_1_dst, op_t_dst);
+ add_op_elementwise_op(op_graph, add_desc, op_t_l0_1_lhs, op_t_l0_1_rhs, op_t_l0_1_dst);
+ add_op_elementwise_op(op_graph, add_desc, op_t_l0_0_dst, op_t_l0_1_dst, op_t_dst);
const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } };
ClWorkload workload;
@@ -374,7 +374,7 @@ TEST_CASE(Loop, framework::DatasetMode::ALL)
OperatorGraph op_graph;
const auto conv2d_desc = Conv2dDescriptor{};
- const auto add_desc = AddDescriptor{};
+ const auto add_desc = ElementwiseDescriptor{};
const auto op_t_l0_lhs = add_tensor(op_graph, t_l0_lhs_info);
const auto op_t_l1_lhs = add_tensor(op_graph, t_l1_lhs_info);
@@ -382,7 +382,7 @@ TEST_CASE(Loop, framework::DatasetMode::ALL)
const auto op_t_state1 = add_tensor(op_graph, state1_info);
add_op_conv2d(op_graph, conv2d_desc, op_t_l0_lhs, op_t_state0, op_t_state1);
- add_op_elementwise_add(op_graph, add_desc, op_t_l1_lhs, op_t_state1, op_t_state0);
+ add_op_elementwise_op(op_graph, add_desc, op_t_l1_lhs, op_t_state1, op_t_state0);
const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } };
ClWorkload workload;