diff options
author | Michalis Spyrou <michalis.spyrou@arm.com> | 2022-06-15 19:02:28 +0100 |
---|---|---|
committer | Michalis Spyrou <michalis.spyrou@arm.com> | 2022-06-27 14:05:05 +0000 |
commit | b1fcefddf3f59219a9d7930d607175b7e6c39347 (patch) | |
tree | 34e95efded15194b3c8abe4ba3da308c3259301d /src/core/experimental/dynamic_fusion/WorkloadImpl | |
parent | 41eb2d92c89274200d59ff97653e2bd66819b310 (diff) | |
download | ComputeLibrary-b1fcefddf3f59219a9d7930d607175b7e6c39347.tar.gz |
Implement new Elementwise Dynamic Fusion Operators: Div, Floor
Resolves: COMPMID-5355
Change-Id: I92f73fbe885f28bbe7b07965b90cfd807c93602f
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7745
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: SiCong Li <sicong.li@arm.com>
Diffstat (limited to 'src/core/experimental/dynamic_fusion/WorkloadImpl')
5 files changed, 178 insertions, 30 deletions
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h index a9ccf908f0..f10e97e3e9 100644 --- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h +++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h @@ -42,14 +42,24 @@ struct ClDirectConv2dKernelDescriptor Conv2dDescriptor conv2d{}; }; -struct ClEltwiseAddKernelDescriptor +struct ClElementwiseKernelDescriptor { - friend bool operator==(const ClEltwiseAddKernelDescriptor &desc0, const ClEltwiseAddKernelDescriptor &desc1) + friend bool operator==(const ClElementwiseKernelDescriptor &desc0, const ClElementwiseKernelDescriptor &desc1) { - return desc0.add == desc1.add; + return desc0.eltwise == desc1.eltwise; } - AddDescriptor add{}; + ElementwiseDescriptor eltwise{}; }; + +struct ClFloorKernelDescriptor +{ + friend bool operator==(const ClFloorKernelDescriptor &desc0, const ClFloorKernelDescriptor &desc1) + { + return desc0.floor == desc1.floor; + } + FloorDescriptor floor{}; +}; + struct ClActivationKernelDescriptor { friend bool operator==(const ClActivationKernelDescriptor &, const ClActivationKernelDescriptor &) diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp index de58ce70ed..cab51a2ce6 100644 --- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp +++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp @@ -124,7 +124,7 @@ bool ClDirectConv2dKernel::operator==(const ClKernel &other) const return config() == other.config() && tensors() == other.tensors() && desc == converted.desc; } -Status ClAddKernel::generate(ClKernelBlueprint &bp) const +Status ClElementwiseKernel::generate(ClKernelBlueprint &bp) const { const auto lhs = _tensors.get_const_tensor(TensorType::ACL_SRC_0); const auto rhs = _tensors.get_const_tensor(TensorType::ACL_SRC_1); @@ -137,11 +137,11 @@ Status ClAddKernel::generate(ClKernelBlueprint &bp) const ArgumentID dst_id; add_tensor(bp, dst->desc, dst_id, dst->id); - add_kcomp_eltwise_add(bp, desc, lhs_id, rhs_id, dst_id); + add_kcomp_eltwise_op(bp, desc, lhs_id, rhs_id, dst_id); return Status{}; } -Status ClAddKernel::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst) +Status ClElementwiseKernel::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst) { // 1. Check validity ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs, dst); @@ -186,9 +186,61 @@ Status ClAddKernel::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, con return Status{}; } -bool ClAddKernel::operator==(const ClKernel &other) const +bool ClElementwiseKernel::operator==(const ClKernel &other) const { - const auto converted = *utils::cast::polymorphic_downcast<const ClAddKernel *>(&other); + const auto converted = *utils::cast::polymorphic_downcast<const ClElementwiseKernel *>(&other); + return config() == other.config() && tensors() == other.tensors() && desc == converted.desc; +} + +Status ClFloorKernel::generate(ClKernelBlueprint &bp) const +{ + const auto src = _tensors.get_const_tensor(TensorType::ACL_SRC_0); + const auto dst = _tensors.get_const_tensor(TensorType::ACL_DST_0); + ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); + ArgumentID src_id; + add_tensor(bp, src->desc, src_id, src->id); + ArgumentID dst_id; + add_tensor(bp, dst->desc, dst_id, dst->id); + + add_kcomp_floor(bp, desc, src_id, dst_id); + return Status{}; +} + +Status ClFloorKernel::validate(const ITensorInfo *src, const ITensorInfo *dst) +{ + // 1. Check validity + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); + + // Matching data type + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst); + + // Matching data layout + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst); + + // All tensor infos are initialized + ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0); + ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0); + + // Device requirements are met + ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src); + + // dst shape is correct + ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(src->tensor_shape(), dst->tensor_shape(), 0), "Wrong shape for dst"); + + // 2. Check support level + + // Data type + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F32, DataType::F16); + + // Data layout + ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC); + + return Status{}; +} + +bool ClFloorKernel::operator==(const ClKernel &other) const +{ + const auto converted = *utils::cast::polymorphic_downcast<const ClFloorKernel *>(&other); return config() == other.config() && tensors() == other.tensors() && desc == converted.desc; } @@ -202,6 +254,7 @@ std::vector<const ClKernel *> traverse(const ClKernelGraph &graph) } return kernels; } + std::vector<ClKernel *> traverse(ClKernelGraph &graph) { std::vector<ClKernel *> kernels; diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h index 54e01ea850..c3580cfaca 100644 --- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h +++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h @@ -139,16 +139,16 @@ public: ClDirectConv2dKernelDescriptor desc{}; }; -struct ClAddKernel : public ClKernel +struct ClElementwiseKernel : public ClKernel { public: Complexity complexity() const override { return Complexity::Simple; } - ClAddKernel() = default; - ~ClAddKernel() override = default; - ClAddKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig &config, const ClEltwiseAddKernelDescriptor &desc, const ITensorDescPack<ClKernelTensor> tensors) + ClElementwiseKernel() = default; + ~ClElementwiseKernel() override = default; + ClElementwiseKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig &config, const ClElementwiseKernelDescriptor &desc, const ITensorDescPack<ClKernelTensor> tensors) : ClKernel{ graph, id, config, tensors }, desc{ desc } { } @@ -156,7 +156,27 @@ public: bool operator==(const ClKernel &other) const override; Status generate(ClKernelBlueprint &bp) const override; - ClEltwiseAddKernelDescriptor desc{}; + ClElementwiseKernelDescriptor desc{}; +}; + +struct ClFloorKernel : public ClKernel +{ +public: + Complexity complexity() const override + { + return Complexity::Simple; + } + ClFloorKernel() = default; + ~ClFloorKernel() override = default; + ClFloorKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig &config, const ClFloorKernelDescriptor &desc, const ITensorDescPack<ClKernelTensor> tensors) + : ClKernel{ graph, id, config, tensors }, desc{ desc } + { + } + static Status validate(const ITensorInfo *src, const ITensorInfo *dst); + bool operator==(const ClKernel &other) const override; + Status generate(ClKernelBlueprint &bp) const override; + + ClFloorKernelDescriptor desc{}; }; struct ClKernelGraph diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp index f971196729..274a2517bb 100644 --- a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp +++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp @@ -113,9 +113,14 @@ bool operator==(const Conv2dDescriptor &conv2d0, const Conv2dDescriptor &conv2d1 return std::make_tuple(conv2d0.pad, conv2d0.stride, conv2d0.dilation) == std::make_tuple(conv2d1.pad, conv2d1.stride, conv2d1.dilation); } -bool operator==(const AddDescriptor &, const AddDescriptor &) +bool operator==(const ElementwiseDescriptor &ed0, const ElementwiseDescriptor &ed1) { - return std::make_tuple() == std::make_tuple(); // Currently two Add ops are always the same + return ed0.op == ed1.op; // Compare Arithmatic Operations of two ElementwiseDescriptor objects +} + +bool operator==(const FloorDescriptor &, const FloorDescriptor &) +{ + return std::make_tuple() == std::make_tuple(); // Currently two Floor ops are always the same } bool Conv2dContent::operator==(const OperatorContent &other) const @@ -124,9 +129,15 @@ bool Conv2dContent::operator==(const OperatorContent &other) const return desc == converted.desc; } -bool AddContent::operator==(const OperatorContent &other) const +bool ElementwiseContent::operator==(const OperatorContent &other) const +{ + const auto converted = *utils::cast::polymorphic_downcast<const ElementwiseContent *>(&other); + return desc == converted.desc; +} + +bool FloorContent::operator==(const OperatorContent &other) const { - const auto converted = *utils::cast::polymorphic_downcast<const AddContent *>(&other); + const auto converted = *utils::cast::polymorphic_downcast<const FloorContent *>(&other); return desc == converted.desc; } @@ -311,7 +322,7 @@ Status Conv2dContent::translate_direct_conv2d(ClKernelGraph &kernel_graph) const return Status{}; } -Status AddContent::translate(ClKernelGraph &kernel_graph) const +Status ElementwiseContent::translate(ClKernelGraph &kernel_graph) const { const auto lhs = _tensors.get_const_tensor(TensorType::ACL_SRC_0); const auto rhs = _tensors.get_const_tensor(TensorType::ACL_SRC_1); @@ -338,16 +349,46 @@ Status AddContent::translate(ClKernelGraph &kernel_graph) const DependencyGraph::Id add_id; ClKernelConfig config{ UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }, TileDescriptor{}, StoreType::TStoreIndirectWidthSelect }; - st = ClAddKernel::validate(lhs->desc, rhs->desc, dst->desc); + st = ClElementwiseKernel::validate(lhs->desc, rhs->desc, dst->desc); ARM_COMPUTE_RETURN_ON_ERROR(st); - st = kernel_graph.add_kernel<ClAddKernel>(config, ClEltwiseAddKernelDescriptor{ desc }, tensors, add_id); + st = kernel_graph.add_kernel<ClElementwiseKernel>(config, ClElementwiseKernelDescriptor{ desc }, tensors, add_id); ARM_COMPUTE_RETURN_ON_ERROR(st); ARM_COMPUTE_UNUSED(add_id); return Status{}; } +Status FloorContent::translate(ClKernelGraph &kernel_graph) const +{ + const auto src = _tensors.get_const_tensor(TensorType::ACL_SRC_0); + const auto dst = _tensors.get_const_tensor(TensorType::ACL_DST_0); + ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); + + ITensorDescPack<ClKernelTensor> tensors; + + DependencyGraph::Id src_id; + auto st = add_kernel_tensor(kernel_graph, *_graph, *src, src_id); + ARM_COMPUTE_RETURN_ON_ERROR(st); + tensors.add_const_tensor(ACL_SRC_0, kernel_graph.get_tensor(src_id)); + + DependencyGraph::Id dst_id; + st = add_kernel_tensor(kernel_graph, *_graph, *dst, dst_id); + ARM_COMPUTE_RETURN_ON_ERROR(st); + tensors.add_const_tensor(ACL_DST_0, kernel_graph.get_tensor(dst_id)); + + DependencyGraph::Id add_id; + ClKernelConfig config{ UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }, TileDescriptor{}, StoreType::TStoreIndirectWidthSelect }; + + st = ClFloorKernel::validate(src->desc, dst->desc); + ARM_COMPUTE_RETURN_ON_ERROR(st); + + st = kernel_graph.add_kernel<ClFloorKernel>(config, ClFloorKernelDescriptor{ desc }, tensors, add_id); + ARM_COMPUTE_RETURN_ON_ERROR(st); + + return Status{}; +} + std::vector<const OperatorContent *> traverse(const OperatorGraph::Implementation &graph) { std::vector<const OperatorContent *> ops; diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h index 2786d610e1..b303cdb9fc 100644 --- a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h +++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h @@ -157,19 +157,19 @@ private: Status translate_direct_conv2d(ClKernelGraph &kernel_graph) const; }; -class AddContent : public OperatorContent +class ElementwiseContent : public OperatorContent { public: - AddContent() = default; - AddContent(const OperatorGraph::Implementation *graph, Id id, const AddDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors) + ElementwiseContent() = default; + ElementwiseContent(const OperatorGraph::Implementation *graph, Id id, const ElementwiseDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors) : OperatorContent(graph, id, tensors), desc(desc) { } - ~AddContent() = default; - AddContent(const AddContent &) = default; - AddContent &operator=(const AddContent &) = default; - AddContent(AddContent &&) = default; - AddContent &operator=(AddContent &&) = default; + ~ElementwiseContent() = default; + ElementwiseContent(const ElementwiseContent &) = default; + ElementwiseContent &operator=(const ElementwiseContent &) = default; + ElementwiseContent(ElementwiseContent &&) = default; + ElementwiseContent &operator=(ElementwiseContent &&) = default; bool operator==(const OperatorContent &other) const override; OperatorComplexity complexity() const override { @@ -178,7 +178,31 @@ public: Status translate(ClKernelGraph &kernel_graph) const override; private: - AddDescriptor desc{}; + ElementwiseDescriptor desc{}; +}; + +class FloorContent : public OperatorContent +{ +public: + FloorContent() = default; + FloorContent(const OperatorGraph::Implementation *graph, Id id, const FloorDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors) + : OperatorContent(graph, id, tensors), desc(desc) + { + } + ~FloorContent() = default; + FloorContent(const FloorContent &) = default; + FloorContent &operator=(const FloorContent &) = default; + FloorContent(FloorContent &&) = default; + FloorContent &operator=(FloorContent &&) = default; + bool operator==(const OperatorContent &other) const override; + OperatorComplexity complexity() const override + { + return OperatorComplexity::Simple; + } + Status translate(ClKernelGraph &kernel_graph) const override; + +private: + FloorDescriptor desc{}; }; struct OperatorGraph::Implementation |