aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2022-06-15 19:02:28 +0100
committerMichalis Spyrou <michalis.spyrou@arm.com>2022-06-27 14:05:05 +0000
commitb1fcefddf3f59219a9d7930d607175b7e6c39347 (patch)
tree34e95efded15194b3c8abe4ba3da308c3259301d /src
parent41eb2d92c89274200d59ff97653e2bd66819b310 (diff)
downloadComputeLibrary-b1fcefddf3f59219a9d7930d607175b7e6c39347.tar.gz
Implement new Elementwise Dynamic Fusion Operators: Div, Floor
Resolves: COMPMID-5355 Change-Id: I92f73fbe885f28bbe7b07965b90cfd807c93602f Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7745 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/CL/cl_kernels/common/gemmlowp.cl4
-rw-r--r--src/core/CL/cl_kernels/nhwc/direct_convolution.cl4
-rw-r--r--src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl4
-rw-r--r--src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl4
-rw-r--r--src/core/CL/cl_kernels/nhwc/dwc_native_quantized_nhwc.cl4
-rw-r--r--src/core/CL/cl_kernels/nhwc/winograd_output_transform.cl12
-rw-r--r--src/core/CL/cl_kernels/tile_helpers.h46
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp20
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h10
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp2
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp (renamed from src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp)46
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h (renamed from src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.h)13
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp130
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h74
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h3
-rw-r--r--src/core/experimental/dynamic_fusion/OperatorGraph.cpp33
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h18
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp63
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h30
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp55
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h42
21 files changed, 530 insertions, 87 deletions
diff --git a/src/core/CL/cl_kernels/common/gemmlowp.cl b/src/core/CL/cl_kernels/common/gemmlowp.cl
index 9fad66df68..53ce296948 100644
--- a/src/core/CL/cl_kernels/common/gemmlowp.cl
+++ b/src/core/CL/cl_kernels/common/gemmlowp.cl
@@ -703,7 +703,7 @@ __kernel void gemmlowp_mm_reshaped_only_rhs_t
a_offset_s32[0].v *= A_OFFSET;
- T_ADD_BROADCAST_X(int, M0, N0, offset_s32, a_offset_s32, offset_s32);
+ T_ELTWISE_BROADCAST_ADD_X(int, M0, N0, offset_s32, a_offset_s32, offset_s32);
#endif // defined(A_OFFSET)
#if defined(B_OFFSET)
@@ -728,7 +728,7 @@ __kernel void gemmlowp_mm_reshaped_only_rhs_t
T_LOAD(int, 1, N0, BUFFER, biases, xo, 0, 1, 0, bias);
- T_ADD_BROADCAST_X(int, M0, N0, offset_s32, bias, offset_s32);
+ T_ELTWISE_BROADCAST_ADD_X(int, M0, N0, offset_s32, bias, offset_s32);
#endif // defined(ADD_BIAS)
LOOP_UNROLLING(int, i, 0, 1, M0,
diff --git a/src/core/CL/cl_kernels/nhwc/direct_convolution.cl b/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
index f1b422a68f..d34e24b436 100644
--- a/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
+++ b/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -233,7 +233,7 @@ __kernel void direct_convolution_nhwc(
T_LOAD(BIA_DATA_TYPE, 1, N0, BUFFER, bia, cout, 0, 1, 0, bias0);
// c = c + bias[broadcasted]
- T_ADD_BROADCAST_X(ACC_DATA_TYPE, M0, N0, c, bias0, c);
+ T_ELTWISE_BROADCAST_ADD_X(ACC_DATA_TYPE, M0, N0, c, bias0, c);
#endif // HAS_BIAS
diff --git a/src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl b/src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl
index 587f3984ab..807b990e82 100644
--- a/src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl
+++ b/src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -254,7 +254,7 @@ __kernel void direct_convolution3d_ndhwc(
}
// c = c + bias[broadcasted]
- T_ADD_BROADCAST_X(ACC_DATA_TYPE, M0, N0, c, bias0, c);
+ T_ELTWISE_BROADCAST_ADD_X(ACC_DATA_TYPE, M0, N0, c, bias0, c);
#endif // HAS_BIAS
diff --git a/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl b/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl
index 4f57a81e7b..b24a6ae85f 100644
--- a/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl
+++ b/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -175,7 +175,7 @@ __kernel void dwc_native_fp_nhwc(
T_LOAD(BIA_DATA_TYPE, 1, N0, BUFFER, bia, (cout * DEPTH_MULTIPLIER) + d, 0, 0, 0, bias0);
// c = c + bias[broadcasted]
- T_ADD_BROADCAST_X(ACC_DATA_TYPE, M0, N0, c, bias0, c);
+ T_ELTWISE_BROADCAST_ADD_X(ACC_DATA_TYPE, M0, N0, c, bias0, c);
#endif // HAS_BIAS
T_ACTIVATION(ACC_DATA_TYPE, M0, N0, ACTIVATION_TYPE, A_VAL, B_VAL, c, c);
diff --git a/src/core/CL/cl_kernels/nhwc/dwc_native_quantized_nhwc.cl b/src/core/CL/cl_kernels/nhwc/dwc_native_quantized_nhwc.cl
index ec2593af71..263a23ef28 100644
--- a/src/core/CL/cl_kernels/nhwc/dwc_native_quantized_nhwc.cl
+++ b/src/core/CL/cl_kernels/nhwc/dwc_native_quantized_nhwc.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -236,7 +236,7 @@ __kernel void dwc_native_quantized_nhwc(
T_LOAD(BIA_DATA_TYPE, 1, N0, BUFFER, bia, cout * DEPTH_MULTIPLIER + d, 0, 0, 0, bias0);
// c = c + bias[broadcasted]
- T_ADD_BROADCAST_X(ACC_DATA_TYPE, M0, N0, c, bias0, c);
+ T_ELTWISE_BROADCAST_ADD_X(ACC_DATA_TYPE, M0, N0, c, bias0, c);
#endif // HAS_BIAS
T_LOAD_MULTIPLIERS_SHIFT(QUANTIZATION_TYPE);
diff --git a/src/core/CL/cl_kernels/nhwc/winograd_output_transform.cl b/src/core/CL/cl_kernels/nhwc/winograd_output_transform.cl
index bab2ee850c..0883cd99c8 100644
--- a/src/core/CL/cl_kernels/nhwc/winograd_output_transform.cl
+++ b/src/core/CL/cl_kernels/nhwc/winograd_output_transform.cl
@@ -111,7 +111,7 @@ __kernel void winograd_output_transform_2x2_7x7_nhwc(
T_LOAD(DATA_TYPE, 1, N0, BUFFER, bias, cout, 0, 1, 0, b);
- T_ADD_BROADCAST_X(DATA_TYPE, 2, N0, out, b, out);
+ T_ELTWISE_BROADCAST_ADD_X(DATA_TYPE, 2, N0, out, b, out);
#endif // defined(HAS_BIAS)
T_ACTIVATION(DATA_TYPE, 2, N0, ACTIVATION_TYPE, A_VAL, B_VAL, out, out);
@@ -177,7 +177,7 @@ __kernel void winograd_output_transform_2x2_7x7_nhwc(
T_LOAD(DATA_TYPE, 1, N0, BUFFER, bias, cout, 0, 1, 0, b);
- T_ADD_BROADCAST_X(DATA_TYPE, 4, N0, out, b, out);
+ T_ELTWISE_BROADCAST_ADD_X(DATA_TYPE, 4, N0, out, b, out);
#endif // defined(HAS_BIAS)
T_ACTIVATION(DATA_TYPE, 4, N0, ACTIVATION_TYPE, A_VAL, B_VAL, out, out);
@@ -287,7 +287,7 @@ __kernel void winograd_output_transform_4x4_3x3_nhwc(
T_LOAD(DATA_TYPE, 1, N0, BUFFER, bias, cout, 0, 1, 0, b);
// c = c + bias[broadcasted]
- T_ADD_BROADCAST_X(DATA_TYPE, 4, N0, out, b, out);
+ T_ELTWISE_BROADCAST_ADD_X(DATA_TYPE, 4, N0, out, b, out);
#endif // HAS_BIAS
int x_out = (mout % NUM_TILES_X) * OUTPUT_TILE_W;
@@ -374,7 +374,7 @@ __kernel void winograd_output_transform_4x4_3x3_nhwc(
T_LOAD(DATA_TYPE, 1, N0, BUFFER, bias, cout, 0, 1, 0, b);
// c = c + bias[broadcasted]
- T_ADD_BROADCAST_X(DATA_TYPE, 16, N0, out, b, out);
+ T_ELTWISE_BROADCAST_ADD_X(DATA_TYPE, 16, N0, out, b, out);
#endif // HAS_BIAS
int x_out = (mout % NUM_TILES_X) * OUTPUT_TILE_W;
@@ -488,7 +488,7 @@ __kernel void winograd_output_transform_4x4_5x5_nhwc(
T_LOAD(DATA_TYPE, 1, N0, BUFFER, bias, cout, 0, 1, 0, b);
// c = c + bias[broadcasted]
- T_ADD_BROADCAST_X(DATA_TYPE, 4, N0, out, b, out);
+ T_ELTWISE_BROADCAST_ADD_X(DATA_TYPE, 4, N0, out, b, out);
#endif // HAS_BIAS
int x_out = (mout % NUM_TILES_X) * OUTPUT_TILE_W;
@@ -586,7 +586,7 @@ __kernel void winograd_output_transform_4x4_5x5_nhwc(
T_LOAD(DATA_TYPE, 1, N0, BUFFER, bias, cout, 0, 1, 0, b);
// c = c + bias[broadcasted]
- T_ADD_BROADCAST_X(DATA_TYPE, 16, N0, out, b, out);
+ T_ELTWISE_BROADCAST_ADD_X(DATA_TYPE, 16, N0, out, b, out);
#endif // HAS_BIAS
int x_out = (mout % NUM_TILES_X) * OUTPUT_TILE_W;
diff --git a/src/core/CL/cl_kernels/tile_helpers.h b/src/core/CL/cl_kernels/tile_helpers.h
index 69898e1bb4..0ce343e3ec 100644
--- a/src/core/CL/cl_kernels/tile_helpers.h
+++ b/src/core/CL/cl_kernels/tile_helpers.h
@@ -970,6 +970,9 @@
#define ACT_OP_QUANTIZED(op, DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x) op##_op_quantized(DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x)
#define ACTIVATION_QUANTIZED(op, DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x) ACT_OP_QUANTIZED(op, DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x)
+#define T_ADD(A_VAL, B_VAL) ((A_VAL) + (B_VAL))
+#define T_DIV(A_VAL, B_VAL) ((A_VAL) / (B_VAL))
+
/** Element-wise activation for quantized types
*
* @note Performs: activation(LHS) = DST
@@ -1014,11 +1017,15 @@
}) \
})
-/** Element-wise addition with RHS broadcasted (RHS has the X dimension only)
+#define T_ELTWISE_BROADCAST_ADD_X(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_X(T_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
+#define T_ELTWISE_BROADCAST_DIV_X(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_X(T_DIV, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
+
+/** Element-wise operation with RHS broadcasted (RHS has the X dimension only)
*
- * @note Performs: LHS + RHS[broadcasted] = DST
+ * @note Performs: LHS OP RHS[broadcasted] = DST
* @note Both tiles must have same data type
*
+ * @param[in] T_ELWISE_OP Elementwise operator to perform
* @param[in] DST_DATA_TYPE DST data type
* @param[in] M0 Number of LHS rows
* @param[in] N0 Number of LHS columns
@@ -1026,19 +1033,23 @@
* @param[in] rhs RHS tile
* @param[out] dst DST tile
*/
-#define T_ADD_BROADCAST_X(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) \
+#define T_ELTWISE_BROADCAST_X(T_ELWISE_OP, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) \
({ \
LOOP_UNROLLING(int, _m0, 0, 1, M0, \
{ \
- dst[_m0].v = CONVERT(lhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)) + CONVERT(rhs[0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)); \
+ dst[_m0].v = T_ELWISE_OP(CONVERT(lhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)), CONVERT(rhs[0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0))); \
}) \
})
-/** Element-wise addition between two tiles (LHS and RHS)
+#define T_ELTWISE_ADD(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE(T_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
+#define T_ELTWISE_DIV(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE(T_DIV, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
+
+/** Element-wise operation between two tiles (LHS and RHS)
*
- * @note Performs: LHS + RHS = DST
+ * @note Performs: LHS OP RHS = DST
* @note Both tiles must have same data type
*
+ * @param[in] T_ELWISE_OP Elementwise operator to perform
* @param[in] DST_DATA_TYPE DST data type
* @param[in] M0 Number of LHS rows
* @param[in] N0 Number of LHS columns
@@ -1046,11 +1057,30 @@
* @param[in] rhs RHS tile
* @param[out] dst DST tile
*/
-#define T_ADD(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) \
+#define T_ELTWISE(T_ELWISE_OP, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) \
+ ({ \
+ LOOP_UNROLLING(int, _m0, 0, 1, M0, \
+ { \
+ dst[_m0].v = T_ELWISE_OP(CONVERT(lhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)), CONVERT(rhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0))); \
+ }) \
+ })
+
+/** Floor operation on a tile
+ *
+ * @note Performs: floor(SRC) = DST
+ * @note Both tiles must have same data type
+ *
+ * @param[in] DST_DATA_TYPE DST data type
+ * @param[in] M0 Number of SRC rows
+ * @param[in] N0 Number of SRC columns
+ * @param[in] src LHS tile
+ * @param[out] dst DST tile
+ */
+#define T_FLOOR(DST_DATA_TYPE, M0, N0, src, dst) \
({ \
LOOP_UNROLLING(int, _m0, 0, 1, M0, \
{ \
- dst[_m0].v = CONVERT(lhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)) + CONVERT(rhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)); \
+ dst[_m0].v = floor(CONVERT(src[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0))); \
}) \
})
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp
index 0be2ba02b5..9b6daae619 100644
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp
+++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp
@@ -55,18 +55,32 @@ Status add_tensor(ClKernelBlueprint &kernel_blueprint, ITensorInfo *tensor_info,
return Status{};
}
-Status add_kcomp_eltwise_add(ClKernelBlueprint &kernel_blueprint, const ClEltwiseAddKernelDescriptor &,
- ArgumentID src0_id, ArgumentID src1_id, ArgumentID &dst_id)
+Status add_kcomp_eltwise_op(ClKernelBlueprint &kernel_blueprint, const ClElementwiseKernelDescriptor &desc,
+ ArgumentID src0_id, ArgumentID src1_id, ArgumentID &dst_id)
{
kernel_blueprint.impl().add_component(
- std::make_unique<ClElementwiseAddKernelComponent>(
+ std::make_unique<ClElementwiseKernelComponent>(
&kernel_blueprint,
+ desc,
SharedVarLink{ src0_id, SharedVarIO::Input },
SharedVarLink{ src1_id, SharedVarIO::Input },
SharedVarLink{ dst_id, SharedVarIO::Output }));
return Status{};
}
+
+Status add_kcomp_floor(ClKernelBlueprint &kernel_blueprint, const ClFloorKernelDescriptor &,
+ ArgumentID src_id, ArgumentID &dst_id)
+{
+ kernel_blueprint.impl().add_component(
+ std::make_unique<ClFloorKernelComponent>(
+ &kernel_blueprint,
+ SharedVarLink{ src_id, SharedVarIO::Input },
+ SharedVarLink{ dst_id, SharedVarIO::Output }));
+
+ return Status{};
+}
+
Status add_kcomp_activation(ClKernelBlueprint &, const ClActivationKernelDescriptor &, ArgumentID, ArgumentID &)
{
return Status{};
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h
index 067e9737e3..463fc5e7cf 100644
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h
+++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h
@@ -59,9 +59,13 @@ public:
};
///// Kernel Components /////
-/** Component: Eltwise Add */
-Status add_kcomp_eltwise_add(ClKernelBlueprint &, const ClEltwiseAddKernelDescriptor &, ArgumentID src0_id,
- ArgumentID src1_id, ArgumentID &dst_id);
+/** Component: Eltwise Operator */
+Status add_kcomp_eltwise_op(ClKernelBlueprint &, const ClElementwiseKernelDescriptor &, ArgumentID src0_id,
+ ArgumentID src1_id, ArgumentID &dst_id);
+
+/** Component: Floor */
+Status add_kcomp_floor(ClKernelBlueprint &, const ClFloorKernelDescriptor &, ArgumentID src_id,
+ ArgumentID &dst_id);
/** Component: Activation */
Status add_kcomp_activation(ClKernelBlueprint &, const ClActivationKernelDescriptor &, ArgumentID src_id, ArgumentID &dst_id);
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp
index b63e2167b7..811cd79811 100644
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp
+++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp
@@ -237,7 +237,7 @@ std::string ClDirectConvolutionKernelComponent::get_component_code() const
T_LOAD({{BIA_DATA_TYPE}}, 1, N0, BUFFER, {{bias}}, cout, 0, 1, 0, bias0);
// c = c + bias[broadcasted]
- T_ADD_BROADCAST_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, bias0, {{dst}});
+ T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, bias0, {{dst}});
)_";
}
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp
index 965a68f51d..24a9eee9a3 100644
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp
+++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp
@@ -23,7 +23,7 @@
*/
#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.h"
+#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h"
#include "arm_compute/core/Validate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -34,17 +34,17 @@ namespace experimental
{
namespace dynamic_fusion
{
-ComponentType ClElementwiseAddKernelComponent::get_component_type() const
+ComponentType ClElementwiseKernelComponent::get_component_type() const
{
return ComponentType::Simple;
}
-std::set<std::string> ClElementwiseAddKernelComponent::get_headers_list() const
+std::set<std::string> ClElementwiseKernelComponent::get_headers_list() const
{
return std::set<std::string> { "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h", "tile_helpers.h" };
}
-Window ClElementwiseAddKernelComponent::get_window() const
+Window ClElementwiseKernelComponent::get_window() const
{
const ITensorInfo *lhs_info = _blueprint->impl().get_kernel_argument_info(_lhs.arg_id);
const ITensorInfo *rhs_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id);
@@ -64,7 +64,7 @@ Window ClElementwiseAddKernelComponent::get_window() const
return win;
}
-std::string ClElementwiseAddKernelComponent::get_component_code() const
+std::string ClElementwiseKernelComponent::get_component_code() const
{
std::string code;
const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument;
@@ -72,7 +72,7 @@ std::string ClElementwiseAddKernelComponent::get_component_code() const
if(is_root)
{
return R"_(
- //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_ADD ---------------------
+ //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_OP ---------------------
// IN_0(LHS) {{lhs}}
// IN_1(RHS) {{rhs}}
// OUT(dst, accum) {{dst}}
@@ -87,19 +87,19 @@ std::string ClElementwiseAddKernelComponent::get_component_code() const
T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{rhs}}, cout, mout, 1, {{rhs}}_stride_y, rhs_tile);
#if defined(IS_BROADCAST)
- T_ADD_BROADCAST_X({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}});
+ T_ELTWISE_BROADCAST_{{ELTWISE_OP}}_X({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}});
#else // !defined(IS_BROADCAST)
- T_ADD({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}});
+ T_ELTWISE_{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}});
#endif // defined(IS_BROADCAST)
}
- //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_ADD ---------------------
+ //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_OP ---------------------
)_";
}
else
{
return R"_(
- //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_ADD ---------------------
+ //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_OP ---------------------
// IN_0/Out(Accumulator) {{acc}}
// IN_1(Addend) {{addend}}
@@ -110,17 +110,17 @@ std::string ClElementwiseAddKernelComponent::get_component_code() const
T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{addend}}, cout, mout, 1, {{addend}}_stride_y, addend_tile);
#if defined(IS_BROADCAST)
- T_ADD_BROADCAST_X({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}});
+ T_ELTWISE_BROADCAST_{{ELTWISE_OP}}_X({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}});
#else // !defined(IS_BROADCAST)
- T_ADD({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}});
+ T_ELTWISE_{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}});
#endif // defined(IS_BROADCAST)
}
- //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_ADD ---------------------
+ //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_OP ---------------------
)_";
}
}
-CLBuildOptions ClElementwiseAddKernelComponent::generate_build_options() const
+CLBuildOptions ClElementwiseKernelComponent::generate_build_options() const
{
const auto t_src_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id);
const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
@@ -137,7 +137,7 @@ CLBuildOptions ClElementwiseAddKernelComponent::generate_build_options() const
return build_opts;
}
-std::string ClElementwiseAddKernelComponent::generate_config_id() const
+std::string ClElementwiseKernelComponent::generate_config_id() const
{
auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
std::string config_id{};
@@ -151,7 +151,7 @@ std::string ClElementwiseAddKernelComponent::generate_config_id() const
return config_id;
}
-void ClElementwiseAddKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const
+void ClElementwiseKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const
{
const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument;
vtable.add(_lhs, _blueprint->impl().group(_lhs.arg_id), ClKernelArgDescriptor(_lhs.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "lhs");
@@ -162,7 +162,7 @@ void ClElementwiseAddKernelComponent::allocate_shared_vars(SharedVarTable &vtabl
}
}
-ClElementwiseAddKernelComponent::TagLUT ClElementwiseAddKernelComponent::get_tag_lut(const SharedVarTable &vtable) const
+ClElementwiseKernelComponent::TagLUT ClElementwiseKernelComponent::get_tag_lut(const SharedVarTable &vtable) const
{
TagLUT lut{};
const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
@@ -199,6 +199,18 @@ ClElementwiseAddKernelComponent::TagLUT ClElementwiseAddKernelComponent::get_tag
// Local build options
lut["meta_kernel_id"] = id();
lut["DATA_TYPE"] = get_cl_type_from_data_type(t_dst_info->data_type());
+
+ switch(_desc.eltwise.op)
+ {
+ case ArithmeticOperation::DIV:
+ lut["ELTWISE_OP"] = "DIV";
+ break;
+ case ArithmeticOperation::ADD:
+ lut["ELTWISE_OP"] = "ADD";
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Arithmetic Operation not supported");
+ }
return lut;
}
} // namespace dynamic_fusion
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h
index 5f8b1569ac..91b14ffafa 100644
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.h
+++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h
@@ -34,11 +34,11 @@ namespace experimental
{
namespace dynamic_fusion
{
-class ClElementwiseAddKernelComponent : public IClKernelComponent
+class ClElementwiseKernelComponent : public IClKernelComponent
{
public:
- ClElementwiseAddKernelComponent(ClKernelBlueprint *blueprint, const Link &lhs, const Link &rhs, const Link &dst)
- : IClKernelComponent(blueprint), _lhs{ lhs }, _rhs{ rhs }, _dst{ dst }
+ ClElementwiseKernelComponent(ClKernelBlueprint *blueprint, const ClElementwiseKernelDescriptor &desc, const Link &lhs, const Link &rhs, const Link &dst)
+ : IClKernelComponent(blueprint), _desc{ desc }, _lhs{ lhs }, _rhs{ rhs }, _dst{ dst }
{
}
@@ -63,9 +63,10 @@ public:
}
private:
- Link _lhs{};
- Link _rhs{};
- Link _dst{};
+ ClElementwiseKernelDescriptor _desc{};
+ Link _lhs{};
+ Link _rhs{};
+ Link _dst{};
};
} // namespace dynamic_fusion
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp
new file mode 100644
index 0000000000..87cc110561
--- /dev/null
+++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
+
+#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h"
+#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+ComponentType ClFloorKernelComponent::get_component_type() const
+{
+ return ComponentType::Simple;
+}
+
+std::set<std::string> ClFloorKernelComponent::get_headers_list() const
+{
+ return std::set<std::string> { "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h", "tile_helpers.h" };
+}
+
+Window ClFloorKernelComponent::get_window() const
+{
+ const ITensorInfo *src_info = _blueprint->impl().get_kernel_argument_info(_src.arg_id);
+ ITensorInfo *dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
+
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src_info, dst_info);
+ auto_init_if_empty(*dst_info, src_info->tensor_shape(), 1, src_info->data_type());
+
+ const unsigned int vector_size_byte_opencl = 16;
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / dst_info->element_size(), dst_info->dimension(0));
+ Window win = calculate_max_window(*dst_info, Steps(num_elems_processed_per_iteration));
+
+ return win;
+}
+
+std::string ClFloorKernelComponent::get_component_code() const
+{
+ return R"_(
+ //------------------ START KERNEL {{meta_kernel_id}} FLOOR ---------------------
+ // IN_0(src) {{src}}
+ // OUT(dst, accum) {{dst}}
+ TILE({{DATA_TYPE}}, M0, N0, {{dst}});
+ {
+ TILE({{DATA_TYPE}}, M0, N0, src_tile);
+
+ T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{src}}, cout, mout, 1, {{src}}_stride_y, src_tile);
+ T_FLOOR({{DATA_TYPE}}, M0, N0, src_tile, {{dst}});
+ }
+
+ //------------------ END KERNEL {{meta_kernel_id}} FLOOR ---------------------
+)_";
+}
+
+CLBuildOptions ClFloorKernelComponent::generate_build_options() const
+{
+ CLBuildOptions build_opts{};
+
+ const auto n0 = _blueprint->impl().get_execution_window().x().step();
+ const auto m0 = _blueprint->impl().get_execution_window().y().step();
+
+ const auto dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
+ const unsigned int partial_store_n0 = dst_info->dimension(0) % n0;
+ build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
+ build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
+ build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
+
+ return build_opts;
+}
+
+std::string ClFloorKernelComponent::generate_config_id() const
+{
+ auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
+ std::string config_id{};
+ config_id += lower_string(string_from_data_type(t_dst_info->data_type()));
+ config_id += "_";
+ config_id += support::cpp11::to_string(t_dst_info->dimension(0));
+ config_id += "_";
+ config_id += support::cpp11::to_string(t_dst_info->dimension(1));
+ config_id += "_";
+ config_id += lower_string(string_from_data_layout(t_dst_info->data_layout()));
+ return config_id;
+}
+
+void ClFloorKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const
+{
+ vtable.add(_src, _blueprint->impl().group(_src.arg_id), ClKernelArgDescriptor(_src.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "src");
+ vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst");
+}
+
+ClFloorKernelComponent::TagLUT ClFloorKernelComponent::get_tag_lut(const SharedVarTable &vtable) const
+{
+ TagLUT lut{};
+ const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
+ // Arguments and global shared variables
+ lut["src"] = vtable.get(_src);
+ lut["dst"] = vtable.get(_dst);
+ lut["meta_kernel_id"] = id();
+ lut["DATA_TYPE"] = get_cl_type_from_data_type(t_dst_info->data_type());
+ return lut;
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h
new file mode 100644
index 0000000000..5463e233d4
--- /dev/null
+++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
+
+#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLFLOORKERNELCOMPONENT_H
+#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLFLOORKERNELCOMPONENT_H
+
+#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class ClFloorKernelComponent : public IClKernelComponent
+{
+public:
+ ClFloorKernelComponent(ClKernelBlueprint *blueprint, const Link &src, const Link &dst)
+ : IClKernelComponent(blueprint), _src{ src }, _dst{ dst }
+ {
+ }
+
+ ComponentType get_component_type() const override;
+ std::set<std::string> get_headers_list() const override;
+ std::string get_component_code() const override;
+ Window get_window() const override;
+ CLBuildOptions generate_build_options() const override;
+ std::string generate_config_id() const override;
+
+ virtual std::vector<Link> get_links() const override
+ {
+ return { _src, _dst };
+ }
+
+ virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override;
+ virtual void allocate_shared_vars(SharedVarTable &vtable) const override;
+
+ virtual std::string name() const override
+ {
+ return "floor_" + std::to_string(id());
+ }
+
+private:
+ Link _src{};
+ Link _dst{};
+};
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif // ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLFLOORKERNELCOMPONENT_H
+#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h
index 26e50523a9..3f99dd5553 100644
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h
+++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h
@@ -27,7 +27,8 @@
#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_CLKERNELCOMPONENTS_H
#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h"
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.h"
+#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h"
+#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h"
#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h"
#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_CLKERNELCOMPONENTS_H
diff --git a/src/core/experimental/dynamic_fusion/OperatorGraph.cpp b/src/core/experimental/dynamic_fusion/OperatorGraph.cpp
index a335e5aada..bd88afdb47 100644
--- a/src/core/experimental/dynamic_fusion/OperatorGraph.cpp
+++ b/src/core/experimental/dynamic_fusion/OperatorGraph.cpp
@@ -198,7 +198,7 @@ void force_conv2d_method(OperatorGraph &graph, Operator conv2d, ConvolutionMetho
node->set_method(method);
}
-Operator add_op_elementwise_add(OperatorGraph &graph, const AddDescriptor &desc, OpTensor lhs, OpTensor rhs, OpTensor dst)
+Operator add_op_elementwise_op(OperatorGraph &graph, const ElementwiseDescriptor &desc, OpTensor lhs, OpTensor rhs, OpTensor dst)
{
auto id = graph.impl()->graph.add_operator({ rhs.id(), lhs.id() }, { dst.id() });
check_dependency_graph_op_success(graph, id.first);
@@ -224,7 +224,36 @@ Operator add_op_elementwise_add(OperatorGraph &graph, const AddDescriptor &desc,
tensors.add_const_tensor(ACL_SRC_0, graph.impl()->tensors[lhs.id()].get());
tensors.add_const_tensor(ACL_SRC_1, graph.impl()->tensors[rhs.id()].get());
tensors.add_const_tensor(ACL_DST_0, graph.impl()->tensors[dst.id()].get());
- graph.impl()->add_node<AddContent>(id.second, desc, tensors);
+ graph.impl()->add_node<ElementwiseContent>(id.second, desc, tensors);
+ check_multiple_roots(graph);
+
+ return op_node;
+}
+
+Operator add_op_floor(OperatorGraph &graph, const FloorDescriptor &desc, OpTensor src, OpTensor dst)
+{
+ auto id = graph.impl()->graph.add_operator({ src.id() }, { dst.id() });
+ check_dependency_graph_op_success(graph, id.first);
+
+ Operator op_node(id.second);
+
+ // Infer TensorInfo
+ auto node_src = graph.impl()->tensors[src.id()]->get_tensor_info();
+ OpTensorContent *node_dst = graph.impl()->tensors[dst.id()].get();
+
+ if(node_dst->get_tensor_info()->total_size() == 0)
+ {
+ auto_init_if_empty(*(node_dst->get_tensor_info()), *node_src);
+ }
+
+ // Check execution space
+ auto dst_info = node_dst->get_tensor_info();
+ check_execution_shape(graph, *dst_info);
+
+ ITensorDescPack<OpTensorContent> tensors;
+ tensors.add_const_tensor(ACL_SRC_0, graph.impl()->tensors[src.id()].get());
+ tensors.add_const_tensor(ACL_DST_0, graph.impl()->tensors[dst.id()].get());
+ graph.impl()->add_node<FloorContent>(id.second, desc, tensors);
check_multiple_roots(graph);
return op_node;
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h
index a9ccf908f0..f10e97e3e9 100644
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h
+++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h
@@ -42,14 +42,24 @@ struct ClDirectConv2dKernelDescriptor
Conv2dDescriptor conv2d{};
};
-struct ClEltwiseAddKernelDescriptor
+struct ClElementwiseKernelDescriptor
{
- friend bool operator==(const ClEltwiseAddKernelDescriptor &desc0, const ClEltwiseAddKernelDescriptor &desc1)
+ friend bool operator==(const ClElementwiseKernelDescriptor &desc0, const ClElementwiseKernelDescriptor &desc1)
{
- return desc0.add == desc1.add;
+ return desc0.eltwise == desc1.eltwise;
}
- AddDescriptor add{};
+ ElementwiseDescriptor eltwise{};
};
+
+struct ClFloorKernelDescriptor
+{
+ friend bool operator==(const ClFloorKernelDescriptor &desc0, const ClFloorKernelDescriptor &desc1)
+ {
+ return desc0.floor == desc1.floor;
+ }
+ FloorDescriptor floor{};
+};
+
struct ClActivationKernelDescriptor
{
friend bool operator==(const ClActivationKernelDescriptor &, const ClActivationKernelDescriptor &)
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp
index de58ce70ed..cab51a2ce6 100644
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp
+++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp
@@ -124,7 +124,7 @@ bool ClDirectConv2dKernel::operator==(const ClKernel &other) const
return config() == other.config() && tensors() == other.tensors() && desc == converted.desc;
}
-Status ClAddKernel::generate(ClKernelBlueprint &bp) const
+Status ClElementwiseKernel::generate(ClKernelBlueprint &bp) const
{
const auto lhs = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
const auto rhs = _tensors.get_const_tensor(TensorType::ACL_SRC_1);
@@ -137,11 +137,11 @@ Status ClAddKernel::generate(ClKernelBlueprint &bp) const
ArgumentID dst_id;
add_tensor(bp, dst->desc, dst_id, dst->id);
- add_kcomp_eltwise_add(bp, desc, lhs_id, rhs_id, dst_id);
+ add_kcomp_eltwise_op(bp, desc, lhs_id, rhs_id, dst_id);
return Status{};
}
-Status ClAddKernel::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst)
+Status ClElementwiseKernel::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst)
{
// 1. Check validity
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs, dst);
@@ -186,9 +186,61 @@ Status ClAddKernel::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, con
return Status{};
}
-bool ClAddKernel::operator==(const ClKernel &other) const
+bool ClElementwiseKernel::operator==(const ClKernel &other) const
{
- const auto converted = *utils::cast::polymorphic_downcast<const ClAddKernel *>(&other);
+ const auto converted = *utils::cast::polymorphic_downcast<const ClElementwiseKernel *>(&other);
+ return config() == other.config() && tensors() == other.tensors() && desc == converted.desc;
+}
+
+Status ClFloorKernel::generate(ClKernelBlueprint &bp) const
+{
+ const auto src = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
+ const auto dst = _tensors.get_const_tensor(TensorType::ACL_DST_0);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ArgumentID src_id;
+ add_tensor(bp, src->desc, src_id, src->id);
+ ArgumentID dst_id;
+ add_tensor(bp, dst->desc, dst_id, dst->id);
+
+ add_kcomp_floor(bp, desc, src_id, dst_id);
+ return Status{};
+}
+
+Status ClFloorKernel::validate(const ITensorInfo *src, const ITensorInfo *dst)
+{
+ // 1. Check validity
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+
+ // Matching data type
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
+
+ // Matching data layout
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst);
+
+ // All tensor infos are initialized
+ ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
+
+ // Device requirements are met
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
+
+ // dst shape is correct
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(src->tensor_shape(), dst->tensor_shape(), 0), "Wrong shape for dst");
+
+ // 2. Check support level
+
+ // Data type
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F32, DataType::F16);
+
+ // Data layout
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
+
+ return Status{};
+}
+
+bool ClFloorKernel::operator==(const ClKernel &other) const
+{
+ const auto converted = *utils::cast::polymorphic_downcast<const ClFloorKernel *>(&other);
return config() == other.config() && tensors() == other.tensors() && desc == converted.desc;
}
@@ -202,6 +254,7 @@ std::vector<const ClKernel *> traverse(const ClKernelGraph &graph)
}
return kernels;
}
+
std::vector<ClKernel *> traverse(ClKernelGraph &graph)
{
std::vector<ClKernel *> kernels;
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h
index 54e01ea850..c3580cfaca 100644
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h
+++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h
@@ -139,16 +139,16 @@ public:
ClDirectConv2dKernelDescriptor desc{};
};
-struct ClAddKernel : public ClKernel
+struct ClElementwiseKernel : public ClKernel
{
public:
Complexity complexity() const override
{
return Complexity::Simple;
}
- ClAddKernel() = default;
- ~ClAddKernel() override = default;
- ClAddKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig &config, const ClEltwiseAddKernelDescriptor &desc, const ITensorDescPack<ClKernelTensor> tensors)
+ ClElementwiseKernel() = default;
+ ~ClElementwiseKernel() override = default;
+ ClElementwiseKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig &config, const ClElementwiseKernelDescriptor &desc, const ITensorDescPack<ClKernelTensor> tensors)
: ClKernel{ graph, id, config, tensors }, desc{ desc }
{
}
@@ -156,7 +156,27 @@ public:
bool operator==(const ClKernel &other) const override;
Status generate(ClKernelBlueprint &bp) const override;
- ClEltwiseAddKernelDescriptor desc{};
+ ClElementwiseKernelDescriptor desc{};
+};
+
+struct ClFloorKernel : public ClKernel
+{
+public:
+ Complexity complexity() const override
+ {
+ return Complexity::Simple;
+ }
+ ClFloorKernel() = default;
+ ~ClFloorKernel() override = default;
+ ClFloorKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig &config, const ClFloorKernelDescriptor &desc, const ITensorDescPack<ClKernelTensor> tensors)
+ : ClKernel{ graph, id, config, tensors }, desc{ desc }
+ {
+ }
+ static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
+ bool operator==(const ClKernel &other) const override;
+ Status generate(ClKernelBlueprint &bp) const override;
+
+ ClFloorKernelDescriptor desc{};
};
struct ClKernelGraph
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp
index f971196729..274a2517bb 100644
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp
+++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp
@@ -113,9 +113,14 @@ bool operator==(const Conv2dDescriptor &conv2d0, const Conv2dDescriptor &conv2d1
return std::make_tuple(conv2d0.pad, conv2d0.stride, conv2d0.dilation) == std::make_tuple(conv2d1.pad, conv2d1.stride, conv2d1.dilation);
}
-bool operator==(const AddDescriptor &, const AddDescriptor &)
+bool operator==(const ElementwiseDescriptor &ed0, const ElementwiseDescriptor &ed1)
{
- return std::make_tuple() == std::make_tuple(); // Currently two Add ops are always the same
+ return ed0.op == ed1.op; // Compare Arithmatic Operations of two ElementwiseDescriptor objects
+}
+
+bool operator==(const FloorDescriptor &, const FloorDescriptor &)
+{
+ return std::make_tuple() == std::make_tuple(); // Currently two Floor ops are always the same
}
bool Conv2dContent::operator==(const OperatorContent &other) const
@@ -124,9 +129,15 @@ bool Conv2dContent::operator==(const OperatorContent &other) const
return desc == converted.desc;
}
-bool AddContent::operator==(const OperatorContent &other) const
+bool ElementwiseContent::operator==(const OperatorContent &other) const
+{
+ const auto converted = *utils::cast::polymorphic_downcast<const ElementwiseContent *>(&other);
+ return desc == converted.desc;
+}
+
+bool FloorContent::operator==(const OperatorContent &other) const
{
- const auto converted = *utils::cast::polymorphic_downcast<const AddContent *>(&other);
+ const auto converted = *utils::cast::polymorphic_downcast<const FloorContent *>(&other);
return desc == converted.desc;
}
@@ -311,7 +322,7 @@ Status Conv2dContent::translate_direct_conv2d(ClKernelGraph &kernel_graph) const
return Status{};
}
-Status AddContent::translate(ClKernelGraph &kernel_graph) const
+Status ElementwiseContent::translate(ClKernelGraph &kernel_graph) const
{
const auto lhs = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
const auto rhs = _tensors.get_const_tensor(TensorType::ACL_SRC_1);
@@ -338,16 +349,46 @@ Status AddContent::translate(ClKernelGraph &kernel_graph) const
DependencyGraph::Id add_id;
ClKernelConfig config{ UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }, TileDescriptor{}, StoreType::TStoreIndirectWidthSelect };
- st = ClAddKernel::validate(lhs->desc, rhs->desc, dst->desc);
+ st = ClElementwiseKernel::validate(lhs->desc, rhs->desc, dst->desc);
ARM_COMPUTE_RETURN_ON_ERROR(st);
- st = kernel_graph.add_kernel<ClAddKernel>(config, ClEltwiseAddKernelDescriptor{ desc }, tensors, add_id);
+ st = kernel_graph.add_kernel<ClElementwiseKernel>(config, ClElementwiseKernelDescriptor{ desc }, tensors, add_id);
ARM_COMPUTE_RETURN_ON_ERROR(st);
ARM_COMPUTE_UNUSED(add_id);
return Status{};
}
+Status FloorContent::translate(ClKernelGraph &kernel_graph) const
+{
+ const auto src = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
+ const auto dst = _tensors.get_const_tensor(TensorType::ACL_DST_0);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+
+ ITensorDescPack<ClKernelTensor> tensors;
+
+ DependencyGraph::Id src_id;
+ auto st = add_kernel_tensor(kernel_graph, *_graph, *src, src_id);
+ ARM_COMPUTE_RETURN_ON_ERROR(st);
+ tensors.add_const_tensor(ACL_SRC_0, kernel_graph.get_tensor(src_id));
+
+ DependencyGraph::Id dst_id;
+ st = add_kernel_tensor(kernel_graph, *_graph, *dst, dst_id);
+ ARM_COMPUTE_RETURN_ON_ERROR(st);
+ tensors.add_const_tensor(ACL_DST_0, kernel_graph.get_tensor(dst_id));
+
+ DependencyGraph::Id add_id;
+ ClKernelConfig config{ UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }, TileDescriptor{}, StoreType::TStoreIndirectWidthSelect };
+
+ st = ClFloorKernel::validate(src->desc, dst->desc);
+ ARM_COMPUTE_RETURN_ON_ERROR(st);
+
+ st = kernel_graph.add_kernel<ClFloorKernel>(config, ClFloorKernelDescriptor{ desc }, tensors, add_id);
+ ARM_COMPUTE_RETURN_ON_ERROR(st);
+
+ return Status{};
+}
+
std::vector<const OperatorContent *> traverse(const OperatorGraph::Implementation &graph)
{
std::vector<const OperatorContent *> ops;
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h
index 2786d610e1..b303cdb9fc 100644
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h
+++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h
@@ -157,19 +157,19 @@ private:
Status translate_direct_conv2d(ClKernelGraph &kernel_graph) const;
};
-class AddContent : public OperatorContent
+class ElementwiseContent : public OperatorContent
{
public:
- AddContent() = default;
- AddContent(const OperatorGraph::Implementation *graph, Id id, const AddDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors)
+ ElementwiseContent() = default;
+ ElementwiseContent(const OperatorGraph::Implementation *graph, Id id, const ElementwiseDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors)
: OperatorContent(graph, id, tensors), desc(desc)
{
}
- ~AddContent() = default;
- AddContent(const AddContent &) = default;
- AddContent &operator=(const AddContent &) = default;
- AddContent(AddContent &&) = default;
- AddContent &operator=(AddContent &&) = default;
+ ~ElementwiseContent() = default;
+ ElementwiseContent(const ElementwiseContent &) = default;
+ ElementwiseContent &operator=(const ElementwiseContent &) = default;
+ ElementwiseContent(ElementwiseContent &&) = default;
+ ElementwiseContent &operator=(ElementwiseContent &&) = default;
bool operator==(const OperatorContent &other) const override;
OperatorComplexity complexity() const override
{
@@ -178,7 +178,31 @@ public:
Status translate(ClKernelGraph &kernel_graph) const override;
private:
- AddDescriptor desc{};
+ ElementwiseDescriptor desc{};
+};
+
+class FloorContent : public OperatorContent
+{
+public:
+ FloorContent() = default;
+ FloorContent(const OperatorGraph::Implementation *graph, Id id, const FloorDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors)
+ : OperatorContent(graph, id, tensors), desc(desc)
+ {
+ }
+ ~FloorContent() = default;
+ FloorContent(const FloorContent &) = default;
+ FloorContent &operator=(const FloorContent &) = default;
+ FloorContent(FloorContent &&) = default;
+ FloorContent &operator=(FloorContent &&) = default;
+ bool operator==(const OperatorContent &other) const override;
+ OperatorComplexity complexity() const override
+ {
+ return OperatorComplexity::Simple;
+ }
+ Status translate(ClKernelGraph &kernel_graph) const override;
+
+private:
+ FloorDescriptor desc{};
};
struct OperatorGraph::Implementation