From 1d359279e22874121def2ce4bfdb633d94ea5ade Mon Sep 17 00:00:00 2001 From: Sheri Zhang Date: Thu, 10 Jun 2021 13:56:11 +0100 Subject: Add in-place computation for elementwise operations - Add in-place computation for elementwise operations at graph level - Modify support case to test in-place computation for elementwise operations Resolves: COMPMID-4414 Signed-off-by: Sheri Zhang Change-Id: I5a4de1235dd29a31160e770a16d62f4b98c84ae6 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5803 Comments-Addressed: Arm Jenkins Reviewed-by: Michele Di Giorgio Reviewed-by: SiCong Li Tested-by: Arm Jenkins --- arm_compute/graph/Types.h | 1 + src/graph/mutators/InPlaceOperationMutator.cpp | 93 ++++++++++++++++++---- .../fixtures/ElementwiseOperationsFixture.h | 84 ++++++++++++------- 3 files changed, 132 insertions(+), 46 deletions(-) diff --git a/arm_compute/graph/Types.h b/arm_compute/graph/Types.h index 7306b82a1e..63a9433fe6 100644 --- a/arm_compute/graph/Types.h +++ b/arm_compute/graph/Types.h @@ -112,6 +112,7 @@ enum class EltwiseOperation Mul, /**< Arithmetic multiplication */ Max, /**< Arithmetic maximum */ Div, /**< Arithmetic division */ + Min, /**< Arithmetic minimum */ }; /** Supported Unary Element-wise operations */ diff --git a/src/graph/mutators/InPlaceOperationMutator.cpp b/src/graph/mutators/InPlaceOperationMutator.cpp index 61639a8f6f..616ec5c73d 100644 --- a/src/graph/mutators/InPlaceOperationMutator.cpp +++ b/src/graph/mutators/InPlaceOperationMutator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,6 +23,7 @@ */ #include "arm_compute/graph/mutators/InPlaceOperationMutator.h" +#include "arm_compute/core/Validate.h" #include "arm_compute/graph/Graph.h" #include "arm_compute/graph/Logger.h" @@ -69,6 +70,64 @@ bool output_edges_are_separate_tensors(Graph &g, const Edge *input_edge) return edge->tensor() != input_tensor; }); } + +// If do in-place calculation, then need to use the new output and inherit original output's accessor +void set_new_output_and_inherit_accessor(std::unique_ptr &node, Tensor *orig_output, Tensor *new_output) +{ + ARM_COMPUTE_LOG_GRAPH_INFO("Switching to in-place computation for the node with ID : " + << node->id() << " and name : " << node->name() << std::endl); + // Update accessor + new_output->set_accessor(orig_output->extract_accessor()); + // Update output + node->set_output_tensor(new_output->id(), 0); +} + +// Try to mutate the node to perform the elementwise in-place calculation +void try_in_place_elementwise(std::unique_ptr &node) +{ + // Get input edge + Edge *input0_edge = node->input_edge(0); + Edge *input1_edge = node->input_edge(1); + ARM_COMPUTE_ERROR_ON(input0_edge == nullptr || input1_edge == nullptr); + + auto input0_tensor = input0_edge->tensor(); + auto input1_tensor = input1_edge->tensor(); + ARM_COMPUTE_ERROR_ON(input0_tensor == nullptr || input1_tensor == nullptr); + + const auto shape0 = input0_tensor->desc().shape; + const auto shape1 = input1_tensor->desc().shape; + const auto qinfo0 = input0_tensor->desc().quant_info; + const auto qinfo1 = input1_tensor->desc().quant_info; + + const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1); + // Inputs are not broadcast compatible + if(out_shape.total_size() == 0) + { + return; + } + + // Get current output tensor + auto current_output_tensor = node->output(0); + ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr); + const auto qinfo_out = current_output_tensor->desc().quant_info; + + // Can do in place, if the input has same shape as output, has same quntisation info as output, and input doesn't have accessor. + bool input0_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) && (qinfo0 == qinfo_out) && (input0_tensor->accessor() == nullptr); + bool input1_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) && (qinfo1 == qinfo_out) && (input1_tensor->accessor() == nullptr); + + if(input0_can_in_place) + { + set_new_output_and_inherit_accessor(node, current_output_tensor, input0_tensor); + } + else if(input1_can_in_place) + { + set_new_output_and_inherit_accessor(node, current_output_tensor, input1_tensor); + } + else + { + ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor or the quantization info are different.\n"); + } +} } // namespace const char *InPlaceOperationMutator::name() @@ -103,25 +162,27 @@ void InPlaceOperationMutator::mutate(Graph &g) // Check if parent has a single output if yes then force in place calculation else not if((input_edge != nullptr) && output_edges_are_separate_tensors(g, input_edge)) { - // Get current and new output tensors - auto current_output_tensor = node->output(0); - auto new_output_tensor = input_edge->tensor(); - - ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr || new_output_tensor == nullptr); - - // Prevent in-place operation if there is an accessor bound to the in-place tensor or quantization info are different - if(new_output_tensor->accessor() != nullptr || current_output_tensor->desc().quant_info != new_output_tensor->desc().quant_info) + if(node->type() == NodeType::EltwiseLayer) { - ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor or the quantization info are different.\n"); + try_in_place_elementwise(node); } else { - ARM_COMPUTE_LOG_GRAPH_VERBOSE("Switching to in-place computation for the node with ID : " - << node->id() << " and name : " << node->name() << std::endl); - // Update accessor - new_output_tensor->set_accessor(current_output_tensor->extract_accessor()); - // Update output - node->set_output_tensor(new_output_tensor->id(), 0); + // Get current and new output tensors + auto current_output_tensor = node->output(0); + auto new_output_tensor = input_edge->tensor(); + + ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr || new_output_tensor == nullptr); + + // Prevent in-place operation if there is an accessor bound to the in-place tensor or quantization info are different + if(new_output_tensor->accessor() != nullptr || current_output_tensor->desc().quant_info != new_output_tensor->desc().quant_info) + { + ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor or the quantization info are different.\n"); + } + else + { + set_new_output_and_inherit_accessor(node, current_output_tensor, new_output_tensor); + } } } } diff --git a/tests/validation/fixtures/ElementwiseOperationsFixture.h b/tests/validation/fixtures/ElementwiseOperationsFixture.h index 352720c03b..6661862342 100644 --- a/tests/validation/fixtures/ElementwiseOperationsFixture.h +++ b/tests/validation/fixtures/ElementwiseOperationsFixture.h @@ -26,6 +26,7 @@ #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" #include "tests/AssetsLibrary.h" #include "tests/Globals.h" #include "tests/IAccessor.h" @@ -48,12 +49,12 @@ public: template void setup(ArithmeticOperation op, const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, - QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool use_dyanmic_shape = false) + QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool in_place = false, bool use_dynamic_shape = false) { _op = op; - _use_dynamic_shape = use_dyanmic_shape; + _use_dynamic_shape = use_dynamic_shape; - _target = compute_target(shape0, shape1, data_type0, data_type1, output_data_type, qinfo0, qinfo1, qinfo_out); + _target = compute_target(shape0, shape1, data_type0, data_type1, output_data_type, qinfo0, qinfo1, qinfo_out, in_place); _reference = compute_reference(shape0, shape1, data_type0, data_type1, output_data_type, qinfo0, qinfo1, qinfo_out); } @@ -82,12 +83,30 @@ protected: } TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, - QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out) + QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool in_place = false) { // Create tensors - TensorType ref_src1 = create_tensor(shape0, data_type0, 1, qinfo0); - TensorType ref_src2 = create_tensor(shape1, data_type1, 1, qinfo1); - TensorType dst = create_tensor(TensorShape::broadcast_shape(shape0, shape1), output_data_type, 1, qinfo_out); + const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1); + TensorType ref_src1 = create_tensor(shape0, data_type0, 1, qinfo0); + TensorType ref_src2 = create_tensor(shape1, data_type1, 1, qinfo1); + TensorType dst = create_tensor(out_shape, output_data_type, 1, qinfo_out); + + // Check whether do in-place computation and whether inputs are broadcast compatible + TensorType *actual_dst = &dst; + bool src1_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) && (qinfo0 == qinfo_out); + bool src2_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) && (qinfo1 == qinfo_out); + bool do_in_place = in_place && out_shape.total_size() != 0 && (src1_can_in_place || src2_can_in_place); + if(do_in_place) + { + if(src1_can_in_place) + { + actual_dst = &ref_src1; + } + else if(src2_can_in_place) + { + actual_dst = &ref_src2; + } + } // if _use_dynamic_shape is true, this fixture will test scenario for dynamic shapes. // - At configure time, all input tensors are marked as dynamic using set_tensor_dynamic() @@ -101,7 +120,7 @@ protected: // Create and configure function FunctionType elem_op; - elem_op.configure(&ref_src1, &ref_src2, &dst); + elem_op.configure(&ref_src1, &ref_src2, actual_dst); if(_use_dynamic_shape) { @@ -111,16 +130,21 @@ protected: ARM_COMPUTE_ASSERT(ref_src1.info()->is_resizable()); ARM_COMPUTE_ASSERT(ref_src2.info()->is_resizable()); - ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors ref_src1.allocator()->allocate(); ref_src2.allocator()->allocate(); - dst.allocator()->allocate(); + + // If in-place computation is not supported, still need to allocate original dst + if(!do_in_place) + { + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + dst.allocator()->allocate(); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + } ARM_COMPUTE_ASSERT(!ref_src1.info()->is_resizable()); ARM_COMPUTE_ASSERT(!ref_src2.info()->is_resizable()); - ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(ref_src1), 0); @@ -129,7 +153,7 @@ protected: // Compute function elem_op.run(); - return dst; + return std::move(*actual_dst); } SimpleTensor compute_reference(const TensorShape &shape0, const TensorShape &shape1, @@ -162,11 +186,11 @@ public: template void setup(ArithmeticOperation op, const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, - QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, ActivationLayerInfo act_info) + QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, ActivationLayerInfo act_info, bool in_place = false) { ArithmeticOperationsGenericFixture::setup(op, shape0, shape1, data_type0, data_type1, output_data_type, - qinfo0, qinfo1, qinfo_out); + qinfo0, qinfo1, qinfo_out, in_place); _act_info = act_info; } @@ -227,7 +251,7 @@ public: { ArithmeticOperationsGenericFixture::setup(ArithmeticOperation::DIV, shape0, shape1, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo()); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true); } }; @@ -253,7 +277,7 @@ public: { ArithmeticOperationsGenericFixture::setup(ArithmeticOperation::DIV, shape0, shape1, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true, true); } }; @@ -279,7 +303,7 @@ public: { ArithmeticOperationsFuseActivationFixture::setup(ArithmeticOperation::DIV, shape0, shape1, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, true); } }; @@ -333,7 +357,7 @@ public: { ArithmeticOperationsGenericFixture::setup(ArithmeticOperation::MAX, shape0, shape1, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo()); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true); } }; @@ -359,7 +383,7 @@ public: { ArithmeticOperationsFuseActivationFixture::setup(ArithmeticOperation::MAX, shape0, shape1, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, true); } }; @@ -402,7 +426,7 @@ public: { ArithmeticOperationsGenericFixture::setup(ArithmeticOperation::MAX, shape0, shape1, data_type0, data_type1, output_data_type, - qinfo0, qinfo1, qinfo_out); + qinfo0, qinfo1, qinfo_out, true); } }; @@ -415,7 +439,7 @@ public: { ArithmeticOperationsGenericFixture::setup(ArithmeticOperation::MIN, shape0, shape1, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo()); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true); } }; @@ -441,7 +465,7 @@ public: { ArithmeticOperationsFuseActivationFixture::setup(ArithmeticOperation::MIN, shape0, shape1, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, true); } }; @@ -484,7 +508,7 @@ public: { ArithmeticOperationsGenericFixture::setup(ArithmeticOperation::MIN, shape0, shape1, data_type0, data_type1, output_data_type, - qinfo0, qinfo1, qinfo_out); + qinfo0, qinfo1, qinfo_out, true); } }; @@ -497,7 +521,7 @@ public: { ArithmeticOperationsGenericFixture::setup(ArithmeticOperation::SQUARED_DIFF, shape0, shape1, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo()); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true); } }; @@ -523,7 +547,7 @@ public: { ArithmeticOperationsFuseActivationFixture::setup(ArithmeticOperation::SQUARED_DIFF, shape0, shape1, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, true); } }; @@ -566,7 +590,7 @@ public: { ArithmeticOperationsGenericFixture::setup(ArithmeticOperation::SQUARED_DIFF, shape0, shape1, data_type0, data_type1, output_data_type, - qinfo0, qinfo1, qinfo_out); + qinfo0, qinfo1, qinfo_out, true); } }; @@ -579,7 +603,7 @@ public: { ArithmeticOperationsGenericFixture::setup(ArithmeticOperation::PRELU, shape0, shape1, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo()); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true); } }; @@ -622,7 +646,7 @@ public: { ArithmeticOperationsGenericFixture::setup(ArithmeticOperation::PRELU, shape0, shape1, data_type0, data_type1, output_data_type, - qinfo0, qinfo1, qinfo_out); + qinfo0, qinfo1, qinfo_out, true); } }; @@ -635,7 +659,7 @@ public: { ArithmeticOperationsGenericFixture::setup(ArithmeticOperation::POWER, shape0, shape1, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo()); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true); } }; @@ -661,7 +685,7 @@ public: { ArithmeticOperationsFuseActivationFixture::setup(ArithmeticOperation::POWER, shape0, shape1, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, true); } }; -- cgit v1.2.1