aboutsummaryrefslogtreecommitdiff
path: root/src/core/experimental/dynamic_fusion/WorkloadImpl
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2022-06-15 19:02:28 +0100
committerMichalis Spyrou <michalis.spyrou@arm.com>2022-06-27 14:05:05 +0000
commitb1fcefddf3f59219a9d7930d607175b7e6c39347 (patch)
tree34e95efded15194b3c8abe4ba3da308c3259301d /src/core/experimental/dynamic_fusion/WorkloadImpl
parent41eb2d92c89274200d59ff97653e2bd66819b310 (diff)
downloadComputeLibrary-b1fcefddf3f59219a9d7930d607175b7e6c39347.tar.gz
Implement new Elementwise Dynamic Fusion Operators: Div, Floor
Resolves: COMPMID-5355 Change-Id: I92f73fbe885f28bbe7b07965b90cfd807c93602f Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7745 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com>
Diffstat (limited to 'src/core/experimental/dynamic_fusion/WorkloadImpl')
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h18
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp63
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h30
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp55
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h42
5 files changed, 178 insertions, 30 deletions
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h
index a9ccf908f0..f10e97e3e9 100644
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h
+++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h
@@ -42,14 +42,24 @@ struct ClDirectConv2dKernelDescriptor
Conv2dDescriptor conv2d{};
};
-struct ClEltwiseAddKernelDescriptor
+struct ClElementwiseKernelDescriptor
{
- friend bool operator==(const ClEltwiseAddKernelDescriptor &desc0, const ClEltwiseAddKernelDescriptor &desc1)
+ friend bool operator==(const ClElementwiseKernelDescriptor &desc0, const ClElementwiseKernelDescriptor &desc1)
{
- return desc0.add == desc1.add;
+ return desc0.eltwise == desc1.eltwise;
}
- AddDescriptor add{};
+ ElementwiseDescriptor eltwise{};
};
+
+struct ClFloorKernelDescriptor
+{
+ friend bool operator==(const ClFloorKernelDescriptor &desc0, const ClFloorKernelDescriptor &desc1)
+ {
+ return desc0.floor == desc1.floor;
+ }
+ FloorDescriptor floor{};
+};
+
struct ClActivationKernelDescriptor
{
friend bool operator==(const ClActivationKernelDescriptor &, const ClActivationKernelDescriptor &)
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp
index de58ce70ed..cab51a2ce6 100644
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp
+++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp
@@ -124,7 +124,7 @@ bool ClDirectConv2dKernel::operator==(const ClKernel &other) const
return config() == other.config() && tensors() == other.tensors() && desc == converted.desc;
}
-Status ClAddKernel::generate(ClKernelBlueprint &bp) const
+Status ClElementwiseKernel::generate(ClKernelBlueprint &bp) const
{
const auto lhs = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
const auto rhs = _tensors.get_const_tensor(TensorType::ACL_SRC_1);
@@ -137,11 +137,11 @@ Status ClAddKernel::generate(ClKernelBlueprint &bp) const
ArgumentID dst_id;
add_tensor(bp, dst->desc, dst_id, dst->id);
- add_kcomp_eltwise_add(bp, desc, lhs_id, rhs_id, dst_id);
+ add_kcomp_eltwise_op(bp, desc, lhs_id, rhs_id, dst_id);
return Status{};
}
-Status ClAddKernel::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst)
+Status ClElementwiseKernel::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst)
{
// 1. Check validity
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs, dst);
@@ -186,9 +186,61 @@ Status ClAddKernel::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, con
return Status{};
}
-bool ClAddKernel::operator==(const ClKernel &other) const
+bool ClElementwiseKernel::operator==(const ClKernel &other) const
{
- const auto converted = *utils::cast::polymorphic_downcast<const ClAddKernel *>(&other);
+ const auto converted = *utils::cast::polymorphic_downcast<const ClElementwiseKernel *>(&other);
+ return config() == other.config() && tensors() == other.tensors() && desc == converted.desc;
+}
+
+Status ClFloorKernel::generate(ClKernelBlueprint &bp) const
+{
+ const auto src = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
+ const auto dst = _tensors.get_const_tensor(TensorType::ACL_DST_0);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ArgumentID src_id;
+ add_tensor(bp, src->desc, src_id, src->id);
+ ArgumentID dst_id;
+ add_tensor(bp, dst->desc, dst_id, dst->id);
+
+ add_kcomp_floor(bp, desc, src_id, dst_id);
+ return Status{};
+}
+
+Status ClFloorKernel::validate(const ITensorInfo *src, const ITensorInfo *dst)
+{
+ // 1. Check validity
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+
+ // Matching data type
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
+
+ // Matching data layout
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst);
+
+ // All tensor infos are initialized
+ ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
+
+ // Device requirements are met
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
+
+ // dst shape is correct
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(src->tensor_shape(), dst->tensor_shape(), 0), "Wrong shape for dst");
+
+ // 2. Check support level
+
+ // Data type
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F32, DataType::F16);
+
+ // Data layout
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
+
+ return Status{};
+}
+
+bool ClFloorKernel::operator==(const ClKernel &other) const
+{
+ const auto converted = *utils::cast::polymorphic_downcast<const ClFloorKernel *>(&other);
return config() == other.config() && tensors() == other.tensors() && desc == converted.desc;
}
@@ -202,6 +254,7 @@ std::vector<const ClKernel *> traverse(const ClKernelGraph &graph)
}
return kernels;
}
+
std::vector<ClKernel *> traverse(ClKernelGraph &graph)
{
std::vector<ClKernel *> kernels;
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h
index 54e01ea850..c3580cfaca 100644
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h
+++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h
@@ -139,16 +139,16 @@ public:
ClDirectConv2dKernelDescriptor desc{};
};
-struct ClAddKernel : public ClKernel
+struct ClElementwiseKernel : public ClKernel
{
public:
Complexity complexity() const override
{
return Complexity::Simple;
}
- ClAddKernel() = default;
- ~ClAddKernel() override = default;
- ClAddKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig &config, const ClEltwiseAddKernelDescriptor &desc, const ITensorDescPack<ClKernelTensor> tensors)
+ ClElementwiseKernel() = default;
+ ~ClElementwiseKernel() override = default;
+ ClElementwiseKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig &config, const ClElementwiseKernelDescriptor &desc, const ITensorDescPack<ClKernelTensor> tensors)
: ClKernel{ graph, id, config, tensors }, desc{ desc }
{
}
@@ -156,7 +156,27 @@ public:
bool operator==(const ClKernel &other) const override;
Status generate(ClKernelBlueprint &bp) const override;
- ClEltwiseAddKernelDescriptor desc{};
+ ClElementwiseKernelDescriptor desc{};
+};
+
+struct ClFloorKernel : public ClKernel
+{
+public:
+ Complexity complexity() const override
+ {
+ return Complexity::Simple;
+ }
+ ClFloorKernel() = default;
+ ~ClFloorKernel() override = default;
+ ClFloorKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig &config, const ClFloorKernelDescriptor &desc, const ITensorDescPack<ClKernelTensor> tensors)
+ : ClKernel{ graph, id, config, tensors }, desc{ desc }
+ {
+ }
+ static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
+ bool operator==(const ClKernel &other) const override;
+ Status generate(ClKernelBlueprint &bp) const override;
+
+ ClFloorKernelDescriptor desc{};
};
struct ClKernelGraph
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp
index f971196729..274a2517bb 100644
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp
+++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp
@@ -113,9 +113,14 @@ bool operator==(const Conv2dDescriptor &conv2d0, const Conv2dDescriptor &conv2d1
return std::make_tuple(conv2d0.pad, conv2d0.stride, conv2d0.dilation) == std::make_tuple(conv2d1.pad, conv2d1.stride, conv2d1.dilation);
}
-bool operator==(const AddDescriptor &, const AddDescriptor &)
+bool operator==(const ElementwiseDescriptor &ed0, const ElementwiseDescriptor &ed1)
{
- return std::make_tuple() == std::make_tuple(); // Currently two Add ops are always the same
+ return ed0.op == ed1.op; // Compare Arithmatic Operations of two ElementwiseDescriptor objects
+}
+
+bool operator==(const FloorDescriptor &, const FloorDescriptor &)
+{
+ return std::make_tuple() == std::make_tuple(); // Currently two Floor ops are always the same
}
bool Conv2dContent::operator==(const OperatorContent &other) const
@@ -124,9 +129,15 @@ bool Conv2dContent::operator==(const OperatorContent &other) const
return desc == converted.desc;
}
-bool AddContent::operator==(const OperatorContent &other) const
+bool ElementwiseContent::operator==(const OperatorContent &other) const
+{
+ const auto converted = *utils::cast::polymorphic_downcast<const ElementwiseContent *>(&other);
+ return desc == converted.desc;
+}
+
+bool FloorContent::operator==(const OperatorContent &other) const
{
- const auto converted = *utils::cast::polymorphic_downcast<const AddContent *>(&other);
+ const auto converted = *utils::cast::polymorphic_downcast<const FloorContent *>(&other);
return desc == converted.desc;
}
@@ -311,7 +322,7 @@ Status Conv2dContent::translate_direct_conv2d(ClKernelGraph &kernel_graph) const
return Status{};
}
-Status AddContent::translate(ClKernelGraph &kernel_graph) const
+Status ElementwiseContent::translate(ClKernelGraph &kernel_graph) const
{
const auto lhs = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
const auto rhs = _tensors.get_const_tensor(TensorType::ACL_SRC_1);
@@ -338,16 +349,46 @@ Status AddContent::translate(ClKernelGraph &kernel_graph) const
DependencyGraph::Id add_id;
ClKernelConfig config{ UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }, TileDescriptor{}, StoreType::TStoreIndirectWidthSelect };
- st = ClAddKernel::validate(lhs->desc, rhs->desc, dst->desc);
+ st = ClElementwiseKernel::validate(lhs->desc, rhs->desc, dst->desc);
ARM_COMPUTE_RETURN_ON_ERROR(st);
- st = kernel_graph.add_kernel<ClAddKernel>(config, ClEltwiseAddKernelDescriptor{ desc }, tensors, add_id);
+ st = kernel_graph.add_kernel<ClElementwiseKernel>(config, ClElementwiseKernelDescriptor{ desc }, tensors, add_id);
ARM_COMPUTE_RETURN_ON_ERROR(st);
ARM_COMPUTE_UNUSED(add_id);
return Status{};
}
+Status FloorContent::translate(ClKernelGraph &kernel_graph) const
+{
+ const auto src = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
+ const auto dst = _tensors.get_const_tensor(TensorType::ACL_DST_0);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+
+ ITensorDescPack<ClKernelTensor> tensors;
+
+ DependencyGraph::Id src_id;
+ auto st = add_kernel_tensor(kernel_graph, *_graph, *src, src_id);
+ ARM_COMPUTE_RETURN_ON_ERROR(st);
+ tensors.add_const_tensor(ACL_SRC_0, kernel_graph.get_tensor(src_id));
+
+ DependencyGraph::Id dst_id;
+ st = add_kernel_tensor(kernel_graph, *_graph, *dst, dst_id);
+ ARM_COMPUTE_RETURN_ON_ERROR(st);
+ tensors.add_const_tensor(ACL_DST_0, kernel_graph.get_tensor(dst_id));
+
+ DependencyGraph::Id add_id;
+ ClKernelConfig config{ UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }, TileDescriptor{}, StoreType::TStoreIndirectWidthSelect };
+
+ st = ClFloorKernel::validate(src->desc, dst->desc);
+ ARM_COMPUTE_RETURN_ON_ERROR(st);
+
+ st = kernel_graph.add_kernel<ClFloorKernel>(config, ClFloorKernelDescriptor{ desc }, tensors, add_id);
+ ARM_COMPUTE_RETURN_ON_ERROR(st);
+
+ return Status{};
+}
+
std::vector<const OperatorContent *> traverse(const OperatorGraph::Implementation &graph)
{
std::vector<const OperatorContent *> ops;
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h
index 2786d610e1..b303cdb9fc 100644
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h
+++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h
@@ -157,19 +157,19 @@ private:
Status translate_direct_conv2d(ClKernelGraph &kernel_graph) const;
};
-class AddContent : public OperatorContent
+class ElementwiseContent : public OperatorContent
{
public:
- AddContent() = default;
- AddContent(const OperatorGraph::Implementation *graph, Id id, const AddDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors)
+ ElementwiseContent() = default;
+ ElementwiseContent(const OperatorGraph::Implementation *graph, Id id, const ElementwiseDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors)
: OperatorContent(graph, id, tensors), desc(desc)
{
}
- ~AddContent() = default;
- AddContent(const AddContent &) = default;
- AddContent &operator=(const AddContent &) = default;
- AddContent(AddContent &&) = default;
- AddContent &operator=(AddContent &&) = default;
+ ~ElementwiseContent() = default;
+ ElementwiseContent(const ElementwiseContent &) = default;
+ ElementwiseContent &operator=(const ElementwiseContent &) = default;
+ ElementwiseContent(ElementwiseContent &&) = default;
+ ElementwiseContent &operator=(ElementwiseContent &&) = default;
bool operator==(const OperatorContent &other) const override;
OperatorComplexity complexity() const override
{
@@ -178,7 +178,31 @@ public:
Status translate(ClKernelGraph &kernel_graph) const override;
private:
- AddDescriptor desc{};
+ ElementwiseDescriptor desc{};
+};
+
+class FloorContent : public OperatorContent
+{
+public:
+ FloorContent() = default;
+ FloorContent(const OperatorGraph::Implementation *graph, Id id, const FloorDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors)
+ : OperatorContent(graph, id, tensors), desc(desc)
+ {
+ }
+ ~FloorContent() = default;
+ FloorContent(const FloorContent &) = default;
+ FloorContent &operator=(const FloorContent &) = default;
+ FloorContent(FloorContent &&) = default;
+ FloorContent &operator=(FloorContent &&) = default;
+ bool operator==(const OperatorContent &other) const override;
+ OperatorComplexity complexity() const override
+ {
+ return OperatorComplexity::Simple;
+ }
+ Status translate(ClKernelGraph &kernel_graph) const override;
+
+private:
+ FloorDescriptor desc{};
};
struct OperatorGraph::Implementation