diff options
author | Michalis Spyrou <michalis.spyrou@arm.com> | 2022-06-15 19:02:28 +0100 |
---|---|---|
committer | Michalis Spyrou <michalis.spyrou@arm.com> | 2022-06-27 14:05:05 +0000 |
commit | b1fcefddf3f59219a9d7930d607175b7e6c39347 (patch) | |
tree | 34e95efded15194b3c8abe4ba3da308c3259301d /src/core/experimental/dynamic_fusion | |
parent | 41eb2d92c89274200d59ff97653e2bd66819b310 (diff) | |
download | ComputeLibrary-b1fcefddf3f59219a9d7930d607175b7e6c39347.tar.gz |
Implement new Elementwise Dynamic Fusion Operators: Div, Floor
Resolves: COMPMID-5355
Change-Id: I92f73fbe885f28bbe7b07965b90cfd807c93602f
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7745
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: SiCong Li <sicong.li@arm.com>
Diffstat (limited to 'src/core/experimental/dynamic_fusion')
14 files changed, 476 insertions, 63 deletions
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp index 0be2ba02b5..9b6daae619 100644 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp @@ -55,18 +55,32 @@ Status add_tensor(ClKernelBlueprint &kernel_blueprint, ITensorInfo *tensor_info, return Status{}; } -Status add_kcomp_eltwise_add(ClKernelBlueprint &kernel_blueprint, const ClEltwiseAddKernelDescriptor &, - ArgumentID src0_id, ArgumentID src1_id, ArgumentID &dst_id) +Status add_kcomp_eltwise_op(ClKernelBlueprint &kernel_blueprint, const ClElementwiseKernelDescriptor &desc, + ArgumentID src0_id, ArgumentID src1_id, ArgumentID &dst_id) { kernel_blueprint.impl().add_component( - std::make_unique<ClElementwiseAddKernelComponent>( + std::make_unique<ClElementwiseKernelComponent>( &kernel_blueprint, + desc, SharedVarLink{ src0_id, SharedVarIO::Input }, SharedVarLink{ src1_id, SharedVarIO::Input }, SharedVarLink{ dst_id, SharedVarIO::Output })); return Status{}; } + +Status add_kcomp_floor(ClKernelBlueprint &kernel_blueprint, const ClFloorKernelDescriptor &, + ArgumentID src_id, ArgumentID &dst_id) +{ + kernel_blueprint.impl().add_component( + std::make_unique<ClFloorKernelComponent>( + &kernel_blueprint, + SharedVarLink{ src_id, SharedVarIO::Input }, + SharedVarLink{ dst_id, SharedVarIO::Output })); + + return Status{}; +} + Status add_kcomp_activation(ClKernelBlueprint &, const ClActivationKernelDescriptor &, ArgumentID, ArgumentID &) { return Status{}; diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h index 067e9737e3..463fc5e7cf 100644 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h @@ -59,9 +59,13 @@ public: }; ///// Kernel Components ///// -/** Component: Eltwise Add */ -Status add_kcomp_eltwise_add(ClKernelBlueprint &, const ClEltwiseAddKernelDescriptor &, ArgumentID src0_id, - ArgumentID src1_id, ArgumentID &dst_id); +/** Component: Eltwise Operator */ +Status add_kcomp_eltwise_op(ClKernelBlueprint &, const ClElementwiseKernelDescriptor &, ArgumentID src0_id, + ArgumentID src1_id, ArgumentID &dst_id); + +/** Component: Floor */ +Status add_kcomp_floor(ClKernelBlueprint &, const ClFloorKernelDescriptor &, ArgumentID src_id, + ArgumentID &dst_id); /** Component: Activation */ Status add_kcomp_activation(ClKernelBlueprint &, const ClActivationKernelDescriptor &, ArgumentID src_id, ArgumentID &dst_id); diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp index b63e2167b7..811cd79811 100644 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp @@ -237,7 +237,7 @@ std::string ClDirectConvolutionKernelComponent::get_component_code() const T_LOAD({{BIA_DATA_TYPE}}, 1, N0, BUFFER, {{bias}}, cout, 0, 1, 0, bias0); // c = c + bias[broadcasted] - T_ADD_BROADCAST_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, bias0, {{dst}}); + T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, bias0, {{dst}}); )_"; } diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp index 965a68f51d..24a9eee9a3 100644 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp @@ -23,7 +23,7 @@ */ #ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.h" +#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h" #include "arm_compute/core/Validate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -34,17 +34,17 @@ namespace experimental { namespace dynamic_fusion { -ComponentType ClElementwiseAddKernelComponent::get_component_type() const +ComponentType ClElementwiseKernelComponent::get_component_type() const { return ComponentType::Simple; } -std::set<std::string> ClElementwiseAddKernelComponent::get_headers_list() const +std::set<std::string> ClElementwiseKernelComponent::get_headers_list() const { return std::set<std::string> { "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h", "tile_helpers.h" }; } -Window ClElementwiseAddKernelComponent::get_window() const +Window ClElementwiseKernelComponent::get_window() const { const ITensorInfo *lhs_info = _blueprint->impl().get_kernel_argument_info(_lhs.arg_id); const ITensorInfo *rhs_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id); @@ -64,7 +64,7 @@ Window ClElementwiseAddKernelComponent::get_window() const return win; } -std::string ClElementwiseAddKernelComponent::get_component_code() const +std::string ClElementwiseKernelComponent::get_component_code() const { std::string code; const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument; @@ -72,7 +72,7 @@ std::string ClElementwiseAddKernelComponent::get_component_code() const if(is_root) { return R"_( - //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_ADD --------------------- + //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_OP --------------------- // IN_0(LHS) {{lhs}} // IN_1(RHS) {{rhs}} // OUT(dst, accum) {{dst}} @@ -87,19 +87,19 @@ std::string ClElementwiseAddKernelComponent::get_component_code() const T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{rhs}}, cout, mout, 1, {{rhs}}_stride_y, rhs_tile); #if defined(IS_BROADCAST) - T_ADD_BROADCAST_X({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}}); + T_ELTWISE_BROADCAST_{{ELTWISE_OP}}_X({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}}); #else // !defined(IS_BROADCAST) - T_ADD({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}}); + T_ELTWISE_{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}}); #endif // defined(IS_BROADCAST) } - //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_ADD --------------------- + //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_OP --------------------- )_"; } else { return R"_( - //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_ADD --------------------- + //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_OP --------------------- // IN_0/Out(Accumulator) {{acc}} // IN_1(Addend) {{addend}} @@ -110,17 +110,17 @@ std::string ClElementwiseAddKernelComponent::get_component_code() const T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{addend}}, cout, mout, 1, {{addend}}_stride_y, addend_tile); #if defined(IS_BROADCAST) - T_ADD_BROADCAST_X({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}}); + T_ELTWISE_BROADCAST_{{ELTWISE_OP}}_X({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}}); #else // !defined(IS_BROADCAST) - T_ADD({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}}); + T_ELTWISE_{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}}); #endif // defined(IS_BROADCAST) } - //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_ADD --------------------- + //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_OP --------------------- )_"; } } -CLBuildOptions ClElementwiseAddKernelComponent::generate_build_options() const +CLBuildOptions ClElementwiseKernelComponent::generate_build_options() const { const auto t_src_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id); const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); @@ -137,7 +137,7 @@ CLBuildOptions ClElementwiseAddKernelComponent::generate_build_options() const return build_opts; } -std::string ClElementwiseAddKernelComponent::generate_config_id() const +std::string ClElementwiseKernelComponent::generate_config_id() const { auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); std::string config_id{}; @@ -151,7 +151,7 @@ std::string ClElementwiseAddKernelComponent::generate_config_id() const return config_id; } -void ClElementwiseAddKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const +void ClElementwiseKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const { const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument; vtable.add(_lhs, _blueprint->impl().group(_lhs.arg_id), ClKernelArgDescriptor(_lhs.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "lhs"); @@ -162,7 +162,7 @@ void ClElementwiseAddKernelComponent::allocate_shared_vars(SharedVarTable &vtabl } } -ClElementwiseAddKernelComponent::TagLUT ClElementwiseAddKernelComponent::get_tag_lut(const SharedVarTable &vtable) const +ClElementwiseKernelComponent::TagLUT ClElementwiseKernelComponent::get_tag_lut(const SharedVarTable &vtable) const { TagLUT lut{}; const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); @@ -199,6 +199,18 @@ ClElementwiseAddKernelComponent::TagLUT ClElementwiseAddKernelComponent::get_tag // Local build options lut["meta_kernel_id"] = id(); lut["DATA_TYPE"] = get_cl_type_from_data_type(t_dst_info->data_type()); + + switch(_desc.eltwise.op) + { + case ArithmeticOperation::DIV: + lut["ELTWISE_OP"] = "DIV"; + break; + case ArithmeticOperation::ADD: + lut["ELTWISE_OP"] = "ADD"; + break; + default: + ARM_COMPUTE_ERROR("Arithmetic Operation not supported"); + } return lut; } } // namespace dynamic_fusion diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h index 5f8b1569ac..91b14ffafa 100644 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.h +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h @@ -34,11 +34,11 @@ namespace experimental { namespace dynamic_fusion { -class ClElementwiseAddKernelComponent : public IClKernelComponent +class ClElementwiseKernelComponent : public IClKernelComponent { public: - ClElementwiseAddKernelComponent(ClKernelBlueprint *blueprint, const Link &lhs, const Link &rhs, const Link &dst) - : IClKernelComponent(blueprint), _lhs{ lhs }, _rhs{ rhs }, _dst{ dst } + ClElementwiseKernelComponent(ClKernelBlueprint *blueprint, const ClElementwiseKernelDescriptor &desc, const Link &lhs, const Link &rhs, const Link &dst) + : IClKernelComponent(blueprint), _desc{ desc }, _lhs{ lhs }, _rhs{ rhs }, _dst{ dst } { } @@ -63,9 +63,10 @@ public: } private: - Link _lhs{}; - Link _rhs{}; - Link _dst{}; + ClElementwiseKernelDescriptor _desc{}; + Link _lhs{}; + Link _rhs{}; + Link _dst{}; }; } // namespace dynamic_fusion diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp new file mode 100644 index 0000000000..87cc110561 --- /dev/null +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION + +#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h" +#include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +ComponentType ClFloorKernelComponent::get_component_type() const +{ + return ComponentType::Simple; +} + +std::set<std::string> ClFloorKernelComponent::get_headers_list() const +{ + return std::set<std::string> { "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h", "tile_helpers.h" }; +} + +Window ClFloorKernelComponent::get_window() const +{ + const ITensorInfo *src_info = _blueprint->impl().get_kernel_argument_info(_src.arg_id); + ITensorInfo *dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); + + ARM_COMPUTE_ERROR_ON_NULLPTR(src_info, dst_info); + auto_init_if_empty(*dst_info, src_info->tensor_shape(), 1, src_info->data_type()); + + const unsigned int vector_size_byte_opencl = 16; + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / dst_info->element_size(), dst_info->dimension(0)); + Window win = calculate_max_window(*dst_info, Steps(num_elems_processed_per_iteration)); + + return win; +} + +std::string ClFloorKernelComponent::get_component_code() const +{ + return R"_( + //------------------ START KERNEL {{meta_kernel_id}} FLOOR --------------------- + // IN_0(src) {{src}} + // OUT(dst, accum) {{dst}} + TILE({{DATA_TYPE}}, M0, N0, {{dst}}); + { + TILE({{DATA_TYPE}}, M0, N0, src_tile); + + T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{src}}, cout, mout, 1, {{src}}_stride_y, src_tile); + T_FLOOR({{DATA_TYPE}}, M0, N0, src_tile, {{dst}}); + } + + //------------------ END KERNEL {{meta_kernel_id}} FLOOR --------------------- +)_"; +} + +CLBuildOptions ClFloorKernelComponent::generate_build_options() const +{ + CLBuildOptions build_opts{}; + + const auto n0 = _blueprint->impl().get_execution_window().x().step(); + const auto m0 = _blueprint->impl().get_execution_window().y().step(); + + const auto dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); + const unsigned int partial_store_n0 = dst_info->dimension(0) % n0; + build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); + build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); + build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); + + return build_opts; +} + +std::string ClFloorKernelComponent::generate_config_id() const +{ + auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); + std::string config_id{}; + config_id += lower_string(string_from_data_type(t_dst_info->data_type())); + config_id += "_"; + config_id += support::cpp11::to_string(t_dst_info->dimension(0)); + config_id += "_"; + config_id += support::cpp11::to_string(t_dst_info->dimension(1)); + config_id += "_"; + config_id += lower_string(string_from_data_layout(t_dst_info->data_layout())); + return config_id; +} + +void ClFloorKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const +{ + vtable.add(_src, _blueprint->impl().group(_src.arg_id), ClKernelArgDescriptor(_src.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "src"); + vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst"); +} + +ClFloorKernelComponent::TagLUT ClFloorKernelComponent::get_tag_lut(const SharedVarTable &vtable) const +{ + TagLUT lut{}; + const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); + // Arguments and global shared variables + lut["src"] = vtable.get(_src); + lut["dst"] = vtable.get(_dst); + lut["meta_kernel_id"] = id(); + lut["DATA_TYPE"] = get_cl_type_from_data_type(t_dst_info->data_type()); + return lut; +} +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute +#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
\ No newline at end of file diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h new file mode 100644 index 0000000000..5463e233d4 --- /dev/null +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION + +#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLFLOORKERNELCOMPONENT_H +#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLFLOORKERNELCOMPONENT_H + +#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +class ClFloorKernelComponent : public IClKernelComponent +{ +public: + ClFloorKernelComponent(ClKernelBlueprint *blueprint, const Link &src, const Link &dst) + : IClKernelComponent(blueprint), _src{ src }, _dst{ dst } + { + } + + ComponentType get_component_type() const override; + std::set<std::string> get_headers_list() const override; + std::string get_component_code() const override; + Window get_window() const override; + CLBuildOptions generate_build_options() const override; + std::string generate_config_id() const override; + + virtual std::vector<Link> get_links() const override + { + return { _src, _dst }; + } + + virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override; + virtual void allocate_shared_vars(SharedVarTable &vtable) const override; + + virtual std::string name() const override + { + return "floor_" + std::to_string(id()); + } + +private: + Link _src{}; + Link _dst{}; +}; + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute +#endif // ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLFLOORKERNELCOMPONENT_H +#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
\ No newline at end of file diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h index 26e50523a9..3f99dd5553 100644 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h @@ -27,7 +27,8 @@ #define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_CLKERNELCOMPONENTS_H #include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h" -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.h" +#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h" +#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h" #include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h" #endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_CLKERNELCOMPONENTS_H diff --git a/src/core/experimental/dynamic_fusion/OperatorGraph.cpp b/src/core/experimental/dynamic_fusion/OperatorGraph.cpp index a335e5aada..bd88afdb47 100644 --- a/src/core/experimental/dynamic_fusion/OperatorGraph.cpp +++ b/src/core/experimental/dynamic_fusion/OperatorGraph.cpp @@ -198,7 +198,7 @@ void force_conv2d_method(OperatorGraph &graph, Operator conv2d, ConvolutionMetho node->set_method(method); } -Operator add_op_elementwise_add(OperatorGraph &graph, const AddDescriptor &desc, OpTensor lhs, OpTensor rhs, OpTensor dst) +Operator add_op_elementwise_op(OperatorGraph &graph, const ElementwiseDescriptor &desc, OpTensor lhs, OpTensor rhs, OpTensor dst) { auto id = graph.impl()->graph.add_operator({ rhs.id(), lhs.id() }, { dst.id() }); check_dependency_graph_op_success(graph, id.first); @@ -224,7 +224,36 @@ Operator add_op_elementwise_add(OperatorGraph &graph, const AddDescriptor &desc, tensors.add_const_tensor(ACL_SRC_0, graph.impl()->tensors[lhs.id()].get()); tensors.add_const_tensor(ACL_SRC_1, graph.impl()->tensors[rhs.id()].get()); tensors.add_const_tensor(ACL_DST_0, graph.impl()->tensors[dst.id()].get()); - graph.impl()->add_node<AddContent>(id.second, desc, tensors); + graph.impl()->add_node<ElementwiseContent>(id.second, desc, tensors); + check_multiple_roots(graph); + + return op_node; +} + +Operator add_op_floor(OperatorGraph &graph, const FloorDescriptor &desc, OpTensor src, OpTensor dst) +{ + auto id = graph.impl()->graph.add_operator({ src.id() }, { dst.id() }); + check_dependency_graph_op_success(graph, id.first); + + Operator op_node(id.second); + + // Infer TensorInfo + auto node_src = graph.impl()->tensors[src.id()]->get_tensor_info(); + OpTensorContent *node_dst = graph.impl()->tensors[dst.id()].get(); + + if(node_dst->get_tensor_info()->total_size() == 0) + { + auto_init_if_empty(*(node_dst->get_tensor_info()), *node_src); + } + + // Check execution space + auto dst_info = node_dst->get_tensor_info(); + check_execution_shape(graph, *dst_info); + + ITensorDescPack<OpTensorContent> tensors; + tensors.add_const_tensor(ACL_SRC_0, graph.impl()->tensors[src.id()].get()); + tensors.add_const_tensor(ACL_DST_0, graph.impl()->tensors[dst.id()].get()); + graph.impl()->add_node<FloorContent>(id.second, desc, tensors); check_multiple_roots(graph); return op_node; diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h index a9ccf908f0..f10e97e3e9 100644 --- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h +++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h @@ -42,14 +42,24 @@ struct ClDirectConv2dKernelDescriptor Conv2dDescriptor conv2d{}; }; -struct ClEltwiseAddKernelDescriptor +struct ClElementwiseKernelDescriptor { - friend bool operator==(const ClEltwiseAddKernelDescriptor &desc0, const ClEltwiseAddKernelDescriptor &desc1) + friend bool operator==(const ClElementwiseKernelDescriptor &desc0, const ClElementwiseKernelDescriptor &desc1) { - return desc0.add == desc1.add; + return desc0.eltwise == desc1.eltwise; } - AddDescriptor add{}; + ElementwiseDescriptor eltwise{}; }; + +struct ClFloorKernelDescriptor +{ + friend bool operator==(const ClFloorKernelDescriptor &desc0, const ClFloorKernelDescriptor &desc1) + { + return desc0.floor == desc1.floor; + } + FloorDescriptor floor{}; +}; + struct ClActivationKernelDescriptor { friend bool operator==(const ClActivationKernelDescriptor &, const ClActivationKernelDescriptor &) diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp index de58ce70ed..cab51a2ce6 100644 --- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp +++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp @@ -124,7 +124,7 @@ bool ClDirectConv2dKernel::operator==(const ClKernel &other) const return config() == other.config() && tensors() == other.tensors() && desc == converted.desc; } -Status ClAddKernel::generate(ClKernelBlueprint &bp) const +Status ClElementwiseKernel::generate(ClKernelBlueprint &bp) const { const auto lhs = _tensors.get_const_tensor(TensorType::ACL_SRC_0); const auto rhs = _tensors.get_const_tensor(TensorType::ACL_SRC_1); @@ -137,11 +137,11 @@ Status ClAddKernel::generate(ClKernelBlueprint &bp) const ArgumentID dst_id; add_tensor(bp, dst->desc, dst_id, dst->id); - add_kcomp_eltwise_add(bp, desc, lhs_id, rhs_id, dst_id); + add_kcomp_eltwise_op(bp, desc, lhs_id, rhs_id, dst_id); return Status{}; } -Status ClAddKernel::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst) +Status ClElementwiseKernel::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst) { // 1. Check validity ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs, dst); @@ -186,9 +186,61 @@ Status ClAddKernel::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, con return Status{}; } -bool ClAddKernel::operator==(const ClKernel &other) const +bool ClElementwiseKernel::operator==(const ClKernel &other) const { - const auto converted = *utils::cast::polymorphic_downcast<const ClAddKernel *>(&other); + const auto converted = *utils::cast::polymorphic_downcast<const ClElementwiseKernel *>(&other); + return config() == other.config() && tensors() == other.tensors() && desc == converted.desc; +} + +Status ClFloorKernel::generate(ClKernelBlueprint &bp) const +{ + const auto src = _tensors.get_const_tensor(TensorType::ACL_SRC_0); + const auto dst = _tensors.get_const_tensor(TensorType::ACL_DST_0); + ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); + ArgumentID src_id; + add_tensor(bp, src->desc, src_id, src->id); + ArgumentID dst_id; + add_tensor(bp, dst->desc, dst_id, dst->id); + + add_kcomp_floor(bp, desc, src_id, dst_id); + return Status{}; +} + +Status ClFloorKernel::validate(const ITensorInfo *src, const ITensorInfo *dst) +{ + // 1. Check validity + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); + + // Matching data type + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst); + + // Matching data layout + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst); + + // All tensor infos are initialized + ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0); + ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0); + + // Device requirements are met + ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src); + + // dst shape is correct + ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(src->tensor_shape(), dst->tensor_shape(), 0), "Wrong shape for dst"); + + // 2. Check support level + + // Data type + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F32, DataType::F16); + + // Data layout + ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC); + + return Status{}; +} + +bool ClFloorKernel::operator==(const ClKernel &other) const +{ + const auto converted = *utils::cast::polymorphic_downcast<const ClFloorKernel *>(&other); return config() == other.config() && tensors() == other.tensors() && desc == converted.desc; } @@ -202,6 +254,7 @@ std::vector<const ClKernel *> traverse(const ClKernelGraph &graph) } return kernels; } + std::vector<ClKernel *> traverse(ClKernelGraph &graph) { std::vector<ClKernel *> kernels; diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h index 54e01ea850..c3580cfaca 100644 --- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h +++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h @@ -139,16 +139,16 @@ public: ClDirectConv2dKernelDescriptor desc{}; }; -struct ClAddKernel : public ClKernel +struct ClElementwiseKernel : public ClKernel { public: Complexity complexity() const override { return Complexity::Simple; } - ClAddKernel() = default; - ~ClAddKernel() override = default; - ClAddKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig &config, const ClEltwiseAddKernelDescriptor &desc, const ITensorDescPack<ClKernelTensor> tensors) + ClElementwiseKernel() = default; + ~ClElementwiseKernel() override = default; + ClElementwiseKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig &config, const ClElementwiseKernelDescriptor &desc, const ITensorDescPack<ClKernelTensor> tensors) : ClKernel{ graph, id, config, tensors }, desc{ desc } { } @@ -156,7 +156,27 @@ public: bool operator==(const ClKernel &other) const override; Status generate(ClKernelBlueprint &bp) const override; - ClEltwiseAddKernelDescriptor desc{}; + ClElementwiseKernelDescriptor desc{}; +}; + +struct ClFloorKernel : public ClKernel +{ +public: + Complexity complexity() const override + { + return Complexity::Simple; + } + ClFloorKernel() = default; + ~ClFloorKernel() override = default; + ClFloorKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig &config, const ClFloorKernelDescriptor &desc, const ITensorDescPack<ClKernelTensor> tensors) + : ClKernel{ graph, id, config, tensors }, desc{ desc } + { + } + static Status validate(const ITensorInfo *src, const ITensorInfo *dst); + bool operator==(const ClKernel &other) const override; + Status generate(ClKernelBlueprint &bp) const override; + + ClFloorKernelDescriptor desc{}; }; struct ClKernelGraph diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp index f971196729..274a2517bb 100644 --- a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp +++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp @@ -113,9 +113,14 @@ bool operator==(const Conv2dDescriptor &conv2d0, const Conv2dDescriptor &conv2d1 return std::make_tuple(conv2d0.pad, conv2d0.stride, conv2d0.dilation) == std::make_tuple(conv2d1.pad, conv2d1.stride, conv2d1.dilation); } -bool operator==(const AddDescriptor &, const AddDescriptor &) +bool operator==(const ElementwiseDescriptor &ed0, const ElementwiseDescriptor &ed1) { - return std::make_tuple() == std::make_tuple(); // Currently two Add ops are always the same + return ed0.op == ed1.op; // Compare Arithmatic Operations of two ElementwiseDescriptor objects +} + +bool operator==(const FloorDescriptor &, const FloorDescriptor &) +{ + return std::make_tuple() == std::make_tuple(); // Currently two Floor ops are always the same } bool Conv2dContent::operator==(const OperatorContent &other) const @@ -124,9 +129,15 @@ bool Conv2dContent::operator==(const OperatorContent &other) const return desc == converted.desc; } -bool AddContent::operator==(const OperatorContent &other) const +bool ElementwiseContent::operator==(const OperatorContent &other) const +{ + const auto converted = *utils::cast::polymorphic_downcast<const ElementwiseContent *>(&other); + return desc == converted.desc; +} + +bool FloorContent::operator==(const OperatorContent &other) const { - const auto converted = *utils::cast::polymorphic_downcast<const AddContent *>(&other); + const auto converted = *utils::cast::polymorphic_downcast<const FloorContent *>(&other); return desc == converted.desc; } @@ -311,7 +322,7 @@ Status Conv2dContent::translate_direct_conv2d(ClKernelGraph &kernel_graph) const return Status{}; } -Status AddContent::translate(ClKernelGraph &kernel_graph) const +Status ElementwiseContent::translate(ClKernelGraph &kernel_graph) const { const auto lhs = _tensors.get_const_tensor(TensorType::ACL_SRC_0); const auto rhs = _tensors.get_const_tensor(TensorType::ACL_SRC_1); @@ -338,16 +349,46 @@ Status AddContent::translate(ClKernelGraph &kernel_graph) const DependencyGraph::Id add_id; ClKernelConfig config{ UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }, TileDescriptor{}, StoreType::TStoreIndirectWidthSelect }; - st = ClAddKernel::validate(lhs->desc, rhs->desc, dst->desc); + st = ClElementwiseKernel::validate(lhs->desc, rhs->desc, dst->desc); ARM_COMPUTE_RETURN_ON_ERROR(st); - st = kernel_graph.add_kernel<ClAddKernel>(config, ClEltwiseAddKernelDescriptor{ desc }, tensors, add_id); + st = kernel_graph.add_kernel<ClElementwiseKernel>(config, ClElementwiseKernelDescriptor{ desc }, tensors, add_id); ARM_COMPUTE_RETURN_ON_ERROR(st); ARM_COMPUTE_UNUSED(add_id); return Status{}; } +Status FloorContent::translate(ClKernelGraph &kernel_graph) const +{ + const auto src = _tensors.get_const_tensor(TensorType::ACL_SRC_0); + const auto dst = _tensors.get_const_tensor(TensorType::ACL_DST_0); + ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); + + ITensorDescPack<ClKernelTensor> tensors; + + DependencyGraph::Id src_id; + auto st = add_kernel_tensor(kernel_graph, *_graph, *src, src_id); + ARM_COMPUTE_RETURN_ON_ERROR(st); + tensors.add_const_tensor(ACL_SRC_0, kernel_graph.get_tensor(src_id)); + + DependencyGraph::Id dst_id; + st = add_kernel_tensor(kernel_graph, *_graph, *dst, dst_id); + ARM_COMPUTE_RETURN_ON_ERROR(st); + tensors.add_const_tensor(ACL_DST_0, kernel_graph.get_tensor(dst_id)); + + DependencyGraph::Id add_id; + ClKernelConfig config{ UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }, TileDescriptor{}, StoreType::TStoreIndirectWidthSelect }; + + st = ClFloorKernel::validate(src->desc, dst->desc); + ARM_COMPUTE_RETURN_ON_ERROR(st); + + st = kernel_graph.add_kernel<ClFloorKernel>(config, ClFloorKernelDescriptor{ desc }, tensors, add_id); + ARM_COMPUTE_RETURN_ON_ERROR(st); + + return Status{}; +} + std::vector<const OperatorContent *> traverse(const OperatorGraph::Implementation &graph) { std::vector<const OperatorContent *> ops; diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h index 2786d610e1..b303cdb9fc 100644 --- a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h +++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h @@ -157,19 +157,19 @@ private: Status translate_direct_conv2d(ClKernelGraph &kernel_graph) const; }; -class AddContent : public OperatorContent +class ElementwiseContent : public OperatorContent { public: - AddContent() = default; - AddContent(const OperatorGraph::Implementation *graph, Id id, const AddDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors) + ElementwiseContent() = default; + ElementwiseContent(const OperatorGraph::Implementation *graph, Id id, const ElementwiseDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors) : OperatorContent(graph, id, tensors), desc(desc) { } - ~AddContent() = default; - AddContent(const AddContent &) = default; - AddContent &operator=(const AddContent &) = default; - AddContent(AddContent &&) = default; - AddContent &operator=(AddContent &&) = default; + ~ElementwiseContent() = default; + ElementwiseContent(const ElementwiseContent &) = default; + ElementwiseContent &operator=(const ElementwiseContent &) = default; + ElementwiseContent(ElementwiseContent &&) = default; + ElementwiseContent &operator=(ElementwiseContent &&) = default; bool operator==(const OperatorContent &other) const override; OperatorComplexity complexity() const override { @@ -178,7 +178,31 @@ public: Status translate(ClKernelGraph &kernel_graph) const override; private: - AddDescriptor desc{}; + ElementwiseDescriptor desc{}; +}; + +class FloorContent : public OperatorContent +{ +public: + FloorContent() = default; + FloorContent(const OperatorGraph::Implementation *graph, Id id, const FloorDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors) + : OperatorContent(graph, id, tensors), desc(desc) + { + } + ~FloorContent() = default; + FloorContent(const FloorContent &) = default; + FloorContent &operator=(const FloorContent &) = default; + FloorContent(FloorContent &&) = default; + FloorContent &operator=(FloorContent &&) = default; + bool operator==(const OperatorContent &other) const override; + OperatorComplexity complexity() const override + { + return OperatorComplexity::Simple; + } + Status translate(ClKernelGraph &kernel_graph) const override; + +private: + FloorDescriptor desc{}; }; struct OperatorGraph::Implementation |