From a387e271b1e02ffd5c2993702b9a21c1ed5c95fa Mon Sep 17 00:00:00 2001 From: Sheri Zhang Date: Tue, 29 Jun 2021 17:34:06 +0100 Subject: Add in-place calculation support for CL elementwise arithmetic kernels - Add in-place calculation support in ClArithmeticKernel, ClSaturatedArithmeticKernel and ClMulKernel - Add in-place test cases Resolves: COMPMID-4431 Signed-off-by: Sheri Zhang Change-Id: Id484bdb76b74478a33fedb471ae0c7f799c599f6 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5885 Comments-Addressed: Arm Jenkins Reviewed-by: SiCong Li Tested-by: Arm Jenkins --- .../fixtures/PixelWiseMultiplicationFixture.h | 44 ++++++++++++++-------- 1 file changed, 28 insertions(+), 16 deletions(-) (limited to 'tests/validation/fixtures/PixelWiseMultiplicationFixture.h') diff --git a/tests/validation/fixtures/PixelWiseMultiplicationFixture.h b/tests/validation/fixtures/PixelWiseMultiplicationFixture.h index 7c643bd726..8dc5179109 100644 --- a/tests/validation/fixtures/PixelWiseMultiplicationFixture.h +++ b/tests/validation/fixtures/PixelWiseMultiplicationFixture.h @@ -76,9 +76,29 @@ protected: QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, ActivationLayerInfo act_info) { // Create tensors - TensorType src1 = create_tensor(shape0, dt_in1, 1, qinfo0); - TensorType src2 = create_tensor(shape1, dt_in2, 1, qinfo1); - TensorType dst = create_tensor(TensorShape::broadcast_shape(shape0, shape1), dt_out, 1, qinfo_out); + const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1); + TensorType src1 = create_tensor(shape0, dt_in1, 1, qinfo0); + TensorType src2 = create_tensor(shape1, dt_in2, 1, qinfo1); + TensorType dst = create_tensor(out_shape, dt_out, 1, qinfo_out); + + // Check whether do in-place computation and whether inputs are broadcast compatible + TensorType *actual_dst = &dst; + if(_is_inplace) + { + bool src1_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) && (qinfo0 == qinfo_out) && (dt_in1 == dt_out); + bool src2_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) && (qinfo1 == qinfo_out) && (dt_in2 == dt_out); + bool do_in_place = out_shape.total_size() != 0 && (src1_is_inplace || src2_is_inplace); + ARM_COMPUTE_ASSERT(do_in_place); + + if(src1_is_inplace) + { + actual_dst = &src1; + } + else + { + actual_dst = &src2; + } + } auto allocate_tensor = [](TensorType & t) { @@ -89,11 +109,12 @@ protected: // Create and configure function FunctionType multiply; - multiply.configure(&src1, &src2, (_is_inplace ? &src1 : &dst), scale, convert_policy, rounding_policy, act_info); + multiply.configure(&src1, &src2, actual_dst, scale, convert_policy, rounding_policy, act_info); allocate_tensor(src1); allocate_tensor(src2); + // If don't do in-place computation, still need to allocate original dst if(!_is_inplace) { allocate_tensor(dst); @@ -106,12 +127,7 @@ protected: // Compute function multiply.run(); - if(_is_inplace) - { - return src1; - } - - return dst; + return std::move(*actual_dst); } SimpleTensor compute_reference(const TensorShape &shape0, const TensorShape &shape1, DataType dt_in1, DataType dt_in2, DataType dt_out, @@ -122,16 +138,12 @@ protected: SimpleTensor src1{ shape0, dt_in1, 1, qinfo0 }; SimpleTensor src2{ shape1, dt_in2, 1, qinfo1 }; - // current in-place implementation only supports same metadata of input and output tensors. - // By ignoring output quantization information here, we can make test cases implementation much simpler. - QuantizationInfo output_qinfo = _is_inplace ? qinfo0 : qinfo_out; - // Fill reference fill(src1, 0); fill(src2, 1); - auto result = reference::pixel_wise_multiplication(src1, src2, scale, convert_policy, rounding_policy, dt_out, output_qinfo); - return act_info.enabled() ? reference::activation_layer(result, act_info, output_qinfo) : result; + auto result = reference::pixel_wise_multiplication(src1, src2, scale, convert_policy, rounding_policy, dt_out, qinfo_out); + return act_info.enabled() ? reference::activation_layer(result, act_info, qinfo_out) : result; } TensorType _target{}; -- cgit v1.2.1