From 4a61653202afb018f4f259d3c144a735d73f0a20 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Thu, 4 Jun 2020 15:05:38 +0100 Subject: COMPMID-3480: Perform in-place computations in NEArithmeticAdditionKernel Change-Id: I0089657dd95d7c7b8592984def8e8de1d7e6d085 Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3308 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins --- .../core/NEON/kernels/NEArithmeticAdditionKernel.h | 20 +-- .../runtime/NEON/functions/NEArithmeticAddition.h | 16 +-- .../NEON/kernels/NEArithmeticAdditionKernel.cpp | 121 +++++++++------- src/graph/mutators/InPlaceOperationMutator.cpp | 19 ++- src/runtime/NEON/functions/NEQLSTMLayer.cpp | 2 +- tests/datasets/ShapeDatasets.h | 24 ++++ tests/validation/CL/ArithmeticAddition.cpp | 154 ++------------------- .../validation/GLES_COMPUTE/ArithmeticAddition.cpp | 7 +- tests/validation/NEON/ArithmeticAddition.cpp | 73 +++++++--- .../fixtures/ArithmeticOperationsFixture.h | 50 ++++--- 10 files changed, 233 insertions(+), 253 deletions(-) mode change 100755 => 100644 tests/validation/GLES_COMPUTE/ArithmeticAddition.cpp diff --git a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h index bff34dfda2..50c90b521a 100644 --- a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h +++ b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h @@ -68,12 +68,12 @@ public: * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED * - (QSYMM16,QSYMM16) -> QSYMM16 * - * @param[in] input1 First input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 - * @param[in] input2 Second input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 - * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. - * @param[in] policy Overflow policy. + * @param[in, out] input1 First input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] input2 Second input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[out] output (Optional) The output tensor. Can be nullptr. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. + * @param[in] policy Overflow policy. */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); + void configure(ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAdditionKernel * * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 @@ -91,11 +91,11 @@ public: private: /** Common signature for all the specialised add functions * - * @param[in] input1 First input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32 - * @param[in] input2 Second input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32 - * @param[out] output The output tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32. - * @param[in] policy Overflow policy. - * @param[in] window Region on which to execute the kernel. + * @param[in, out] input1 First input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32 + * @param[in] input2 Second input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32 + * @param[out] output (Optional) The output tensor. Can be nullptr. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32. + * @param[in] policy Overflow policy. + * @param[in] window Region on which to execute the kernel. */ using AddFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const Window &window); /** Add function to use for the particular tensor types passed to configure() */ diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h index bf8f2cc14b..5b430ce397 100644 --- a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h +++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h @@ -25,14 +25,14 @@ #define ARM_COMPUTE_NEARITHMETICADDITION_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; /** Basic function to run @ref NEArithmeticAdditionKernel */ -class NEArithmeticAddition : public INESimpleFunction +class NEArithmeticAddition : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's inputs, output and conversion policy. @@ -51,18 +51,18 @@ public: * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED * - (QSYMM16,QSYMM16) -> QSYMM16 * - * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 - * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 - * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 - * @param[in] policy Policy to use to handle overflow. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[out] output (Optional) Output tensor. Can be nullptr. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] policy Policy to use to handle overflow. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. */ void configure(ITensor *input1, ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAddition * * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 * @param[in] input2 Second tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 - * @param[in] output Output tensor info. Data types supported: U8/SQASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] output (Optional) Output tensor info. Can be nullptr. Data types supported: U8/SQASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 * @param[in] policy Policy to use to handle overflow * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. * diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp index 3878c764a6..a1c263a836 100644 --- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp @@ -810,95 +810,110 @@ void add_U8_U8_S16(const ITensor *in1, const ITensor *in2, ITensor *out, Convert input1, input2, output); } -Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output, ConvertPolicy policy) +Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy) { ARM_COMPUTE_UNUSED(policy); - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&input1); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, + ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input1); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::S32, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::S32, DataType::F32); - const TensorShape out_shape = TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape()); + const TensorShape out_shape = TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape()); ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG((input1.tensor_shape().x() != input2.tensor_shape().x()) && ((input1.data_type() != input2.data_type()) || (input1.data_type() != output.data_type()) - || (input2.data_type() != output.data_type())), - "Broadcasting across width is supported on configurations where all tensors have the same data type"); // Validate in case of configured output - if(output.total_size() > 0) + if((output != nullptr) && (output->total_size() > 0)) { ARM_COMPUTE_RETURN_ERROR_ON_MSG( - !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8 && output.data_type() == DataType::U8) - && !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8 && output.data_type() == DataType::S16) - && !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::S16 && output.data_type() == DataType::S16) - && !(input1.data_type() == DataType::S16 && input2.data_type() == DataType::U8 && output.data_type() == DataType::S16) - && !(input1.data_type() == DataType::S16 && input2.data_type() == DataType::S16 && output.data_type() == DataType::S16) - && !(input1.data_type() == DataType::S32 && input2.data_type() == DataType::S32 && output.data_type() == DataType::S32) - && !(input1.data_type() == DataType::F32 && input2.data_type() == DataType::F32 && output.data_type() == DataType::F32) - && !(input1.data_type() == DataType::F16 && input2.data_type() == DataType::F16 && output.data_type() == DataType::F16) - && !(input1.data_type() == DataType::QASYMM8 && input2.data_type() == DataType::QASYMM8 && output.data_type() == DataType::QASYMM8) - && !(input1.data_type() == DataType::QASYMM8_SIGNED && input2.data_type() == DataType::QASYMM8_SIGNED && output.data_type() == DataType::QASYMM8_SIGNED) - && !(input1.data_type() == DataType::QSYMM16 && input2.data_type() == DataType::QSYMM16 && output.data_type() == DataType::QSYMM16), + !(input1->data_type() == DataType::U8 && input2->data_type() == DataType::U8 && output->data_type() == DataType::U8) + && !(input1->data_type() == DataType::U8 && input2->data_type() == DataType::U8 && output->data_type() == DataType::S16) + && !(input1->data_type() == DataType::U8 && input2->data_type() == DataType::S16 && output->data_type() == DataType::S16) + && !(input1->data_type() == DataType::S16 && input2->data_type() == DataType::U8 && output->data_type() == DataType::S16) + && !(input1->data_type() == DataType::S16 && input2->data_type() == DataType::S16 && output->data_type() == DataType::S16) + && !(input1->data_type() == DataType::S32 && input2->data_type() == DataType::S32 && output->data_type() == DataType::S32) + && !(input1->data_type() == DataType::F32 && input2->data_type() == DataType::F32 && output->data_type() == DataType::F32) + && !(input1->data_type() == DataType::F16 && input2->data_type() == DataType::F16 && output->data_type() == DataType::F16) + && !(input1->data_type() == DataType::QASYMM8 && input2->data_type() == DataType::QASYMM8 && output->data_type() == DataType::QASYMM8) + && !(input1->data_type() == DataType::QASYMM8_SIGNED && input2->data_type() == DataType::QASYMM8_SIGNED && output->data_type() == DataType::QASYMM8_SIGNED) + && !(input1->data_type() == DataType::QSYMM16 && input2->data_type() == DataType::QSYMM16 && output->data_type() == DataType::QSYMM16), "You called addition with the wrong image formats"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output.tensor_shape(), 0), + ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output->tensor_shape(), 0), "Wrong shape for output"); + + ARM_COMPUTE_RETURN_ERROR_ON_MSG((input1->tensor_shape().x() != input2->tensor_shape().x()) + && ((input1->data_type() != input2->data_type()) || (input1->data_type() != output->data_type()) + || (input2->data_type() != output->data_type())), + "Broadcasting across width is supported on configurations where all tensors have the same data type"); + } + else + { + // Either auto-initialized output or in-place computation + ARM_COMPUTE_RETURN_ERROR_ON_MSG((input1->tensor_shape().x() != input2->tensor_shape().x()) && (input1->data_type() != input2->data_type()), + "Broadcasting across width is supported on configurations where all tensors have the same data type"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG((output == nullptr) && detail::have_different_dimensions(out_shape, input1->tensor_shape(), 0), + "In case of in-place computation the broadcast input must be input2"); } return Status{}; } -std::pair validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) +std::pair validate_and_configure_window(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output) { - const std::pair broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(input1, input2); + const std::pair broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*input1, *input2); const TensorShape &out_shape = broadcast_pair.first; const ValidRegion &valid_region = broadcast_pair.second; + ITensorInfo *output_to_use = input1; + // Auto initialize output if not initialized + if(output != nullptr) { - set_shape_if_empty(output, out_shape); + set_shape_if_empty(*output, out_shape); - if(input1.data_type() == DataType::S16 || input2.data_type() == DataType::S16) + if(input1->data_type() == DataType::S16 || input2->data_type() == DataType::S16) { - set_format_if_unknown(output, Format::S16); + set_format_if_unknown(*output, Format::S16); } - if(input1.data_type() == DataType::S32 || input2.data_type() == DataType::S32) + if(input1->data_type() == DataType::S32 || input2->data_type() == DataType::S32) { - set_format_if_unknown(output, Format::S32); + set_format_if_unknown(*output, Format::S32); } - else if(input1.data_type() == DataType::F16 || input2.data_type() == DataType::F16) + else if(input1->data_type() == DataType::F16 || input2->data_type() == DataType::F16) { - set_format_if_unknown(output, Format::F16); + set_format_if_unknown(*output, Format::F16); } - else if(input1.data_type() == DataType::F32 || input2.data_type() == DataType::F32) + else if(input1->data_type() == DataType::F32 || input2->data_type() == DataType::F32) { - set_format_if_unknown(output, Format::F32); + set_format_if_unknown(*output, Format::F32); } - else if(input1.data_type() == DataType::QASYMM8 || input2.data_type() == DataType::QASYMM8) + else if(input1->data_type() == DataType::QASYMM8 || input2->data_type() == DataType::QASYMM8) { - set_data_type_if_unknown(output, DataType::QASYMM8); + set_data_type_if_unknown(*output, DataType::QASYMM8); } - else if(input1.data_type() == DataType::QASYMM8_SIGNED || input2.data_type() == DataType::QASYMM8_SIGNED) + else if(input1->data_type() == DataType::QASYMM8_SIGNED || input2->data_type() == DataType::QASYMM8_SIGNED) { - set_data_type_if_unknown(output, DataType::QASYMM8_SIGNED); + set_data_type_if_unknown(*output, DataType::QASYMM8_SIGNED); } - else if(input1.data_type() == DataType::QSYMM16 || input2.data_type() == DataType::QSYMM16) + else if(input1->data_type() == DataType::QSYMM16 || input2->data_type() == DataType::QSYMM16) { - set_data_type_if_unknown(output, DataType::QSYMM16); + set_data_type_if_unknown(*output, DataType::QSYMM16); } + + output_to_use = output; } Window win = calculate_max_window(valid_region, Steps()); // NEArithmeticAdditionKernel doesn't need padding so update_window_and_padding() can be skipped Coordinates coord; - coord.set_num_dimensions(output.num_dimensions()); - output.set_valid_region(valid_region); + coord.set_num_dimensions(output_to_use->num_dimensions()); + output_to_use->set_valid_region(valid_region); return std::make_pair(Status{}, win); } } // namespace @@ -908,13 +923,15 @@ NEArithmeticAdditionKernel::NEArithmeticAdditionKernel() { } -void NEArithmeticAdditionKernel::configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy) +void NEArithmeticAdditionKernel::configure(ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1->info(), *input2->info(), *output->info(), policy)); + ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1->info(), input2->info(), + (output != nullptr) ? output->info() : nullptr, + policy)); // Configure kernel window - auto win_config = validate_and_configure_window(*input1->info(), *input2->info(), *output->info()); + auto win_config = validate_and_configure_window(input1->info(), input2->info(), (output != nullptr) ? output->info() : nullptr); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); static std::map map_function = @@ -947,14 +964,20 @@ void NEArithmeticAdditionKernel::configure(const ITensor *input1, const ITensor _input1 = input1; _input2 = input2; - _output = output; + _output = input1; _policy = policy; + // Out-of-place calculation + if(output != nullptr) + { + _output = output; + } + std::string function_to_call("add_"); function_to_call += policy == ConvertPolicy::WRAP ? "wrap_" : "saturate_"; function_to_call += string_from_data_type(input1->info()->data_type()) + "_"; function_to_call += string_from_data_type(input2->info()->data_type()) + "_"; - function_to_call += string_from_data_type(output->info()->data_type()); + function_to_call += string_from_data_type(_output->info()->data_type()); auto it = map_function.find(function_to_call); @@ -968,10 +991,8 @@ void NEArithmeticAdditionKernel::configure(const ITensor *input1, const ITensor Status NEArithmeticAdditionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output); - - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input1, *input2, *output, policy)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(*input1->clone(), *input2->clone(), *output->clone()).first); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, (output != nullptr) ? output : nullptr, policy)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input1->clone().get(), input2->clone().get(), (output != nullptr) ? output->clone().get() : nullptr).first); return Status{}; } diff --git a/src/graph/mutators/InPlaceOperationMutator.cpp b/src/graph/mutators/InPlaceOperationMutator.cpp index 3b06537cd9..327e985625 100644 --- a/src/graph/mutators/InPlaceOperationMutator.cpp +++ b/src/graph/mutators/InPlaceOperationMutator.cpp @@ -25,6 +25,7 @@ #include "arm_compute/graph/Graph.h" #include "arm_compute/graph/Logger.h" +#include "arm_compute/graph/backends/BackendRegistry.h" namespace arm_compute { @@ -42,13 +43,29 @@ IGraphMutator::MutationType InPlaceOperationMutator::type() const void InPlaceOperationMutator::mutate(Graph &g) { - std::set in_place_nodes = { NodeType::BatchNormalizationLayer, NodeType::ActivationLayer, NodeType::PrintLayer }; + std::set in_place_nodes = + { + NodeType::ActivationLayer, + NodeType::BatchNormalizationLayer, + NodeType::EltwiseLayer, + NodeType::PrintLayer, + }; // Not interested in the order of nodes for(auto &node : g.nodes()) { if(node && in_place_nodes.find(node->type()) != std::end(in_place_nodes)) { + // Validate node + backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(node->assigned_target()); + Status status = backend.validate_node(*node); + + // If in-place computation is not supported, do nothing and go to next node + if(!bool(status)) + { + continue; + } + // Get input edge Edge *input_edge = node->input_edge(0); diff --git a/src/runtime/NEON/functions/NEQLSTMLayer.cpp b/src/runtime/NEON/functions/NEQLSTMLayer.cpp index a22c669ca7..cd0ecb3a4c 100644 --- a/src/runtime/NEON/functions/NEQLSTMLayer.cpp +++ b/src/runtime/NEON/functions/NEQLSTMLayer.cpp @@ -191,7 +191,7 @@ void NEQLSTMLayer::configure(const ITensor *input, _projection_reduction.configure(_projection_weights, &_projection_eff_bias, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true)); if(_projection_bias != nullptr) { - _projection_bias_add.configure(_projection_bias, &_projection_eff_bias, &_projection_eff_bias, ConvertPolicy::SATURATE); + _projection_bias_add.configure(&_projection_eff_bias, _projection_bias, nullptr, ConvertPolicy::SATURATE); } } diff --git a/tests/datasets/ShapeDatasets.h b/tests/datasets/ShapeDatasets.h index 087342d3b8..0ea8091258 100644 --- a/tests/datasets/ShapeDatasets.h +++ b/tests/datasets/ShapeDatasets.h @@ -241,6 +241,30 @@ public: } }; +/** Data set containing pairs of small tensor shapes that are broadcast compatible for in-place computations. + * When doing in-place computations, only the second input can be broadcast. + */ +class SmallShapesBroadcastInPlace final : public framework::dataset::ZipDataset +{ +public: + SmallShapesBroadcastInPlace() + : ZipDataset( + ShapeDataset("Shape0", + { + TensorShape{ 9U, 9U, 3U, 4U }, + TensorShape{ 27U, 13U, 2U, 4U }, + TensorShape{ 1U, 16U, 10U, 2U, 128U }, + }), + ShapeDataset("Shape1", + { + TensorShape{ 9U, 1U, 3U }, + TensorShape{ 1U }, + TensorShape{ 1U, 1U, 1U, 1U, 128U }, + })) + { + } +}; + /** Data set containing medium tensor shapes. */ class MediumShapes final : public ShapeDataset { diff --git a/tests/validation/CL/ArithmeticAddition.cpp b/tests/validation/CL/ArithmeticAddition.cpp index 41415ee481..229dcbc712 100644 --- a/tests/validation/CL/ArithmeticAddition.cpp +++ b/tests/validation/CL/ArithmeticAddition.cpp @@ -108,31 +108,9 @@ using CLArithmeticAdditionFixture = ArithmeticAdditionValidationFixture(shape, DataType::U8); - CLTensor ref_src2 = create_tensor(shape, DataType::U8); - CLTensor dst = create_tensor(shape, DataType::U8); - - // Create and Configure function - CLArithmeticAddition add; - add.configure(&ref_src1, &ref_src2, &dst, policy); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), num_elems_processed_per_iteration).required_padding(); - validate(ref_src1.info()->padding(), padding); - validate(ref_src2.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - -FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticAdditionU8Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) +FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionU8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + framework::dataset::make("InPlace", { false }))) { // Validate output validate(CLAccessor(_target), _reference); @@ -140,39 +118,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixture, framework TEST_SUITE_END() // U8 TEST_SUITE(S16) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", { DataType::U8, DataType::S16 })), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - shape, data_type, policy) -{ - // Create tensors - CLTensor ref_src1 = create_tensor(shape, data_type); - CLTensor ref_src2 = create_tensor(shape, DataType::S16); - CLTensor dst = create_tensor(shape, DataType::S16); - - // Create and Configure function - CLArithmeticAddition add; - add.configure(&ref_src1, &ref_src2, &dst, policy); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), num_elems_processed_per_iteration).required_padding(); - validate(ref_src1.info()->padding(), padding); - validate(ref_src2.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - -FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticAdditionS16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) +FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionS16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + framework::dataset::make("InPlace", { false }))) { // Validate output validate(CLAccessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticAdditionFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ArithmeticAdditionS16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) +FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticAdditionFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticAdditionS16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + framework::dataset::make("InPlace", { false }))) { // Validate output validate(CLAccessor(_target), _reference); @@ -185,29 +141,6 @@ using CLArithmeticAdditionQuantizedFixture = ArithmeticAdditionValidationQuantiz TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), - shape, policy) -{ - // Create tensors - CLTensor ref_src1 = create_tensor(shape, DataType::QASYMM8); - CLTensor ref_src2 = create_tensor(shape, DataType::QASYMM8); - CLTensor dst = create_tensor(shape, DataType::QASYMM8); - - // Create and Configure function - CLArithmeticAddition add; - add.configure(&ref_src1, &ref_src2, &dst, policy); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), num_elems_processed_per_iteration).required_padding(); - validate(ref_src1.info()->padding(), padding); - validate(ref_src2.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionQASYMM8Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), @@ -220,29 +153,6 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture, } TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), - shape, policy) -{ - // Create tensors - CLTensor ref_src1 = create_tensor(shape, DataType::QASYMM8_SIGNED); - CLTensor ref_src2 = create_tensor(shape, DataType::QASYMM8_SIGNED); - CLTensor dst = create_tensor(shape, DataType::QASYMM8_SIGNED); - - // Create and Configure function - CLArithmeticAddition add; - add.configure(&ref_src1, &ref_src2, &dst, policy); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), num_elems_processed_per_iteration).required_padding(); - validate(ref_src1.info()->padding(), padding); - validate(ref_src2.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionQASYMM8SignedDataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), @@ -255,29 +165,6 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture, f } TEST_SUITE_END() // QASYMM8_SIGNED TEST_SUITE(QSYMM16) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), - shape, policy) -{ - // Create tensors - CLTensor ref_src1 = create_tensor(shape, DataType::QSYMM16); - CLTensor ref_src2 = create_tensor(shape, DataType::QSYMM16); - CLTensor dst = create_tensor(shape, DataType::QSYMM16); - - // Create and Configure function - CLArithmeticAddition add; - add.configure(&ref_src1, &ref_src2, &dst, policy); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), num_elems_processed_per_iteration).required_padding(); - validate(ref_src1.info()->padding(), padding); - validate(ref_src2.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionQSYMM16Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), @@ -313,29 +200,6 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticAdditionFloatFixture TEST_SUITE_END() // FP16 TEST_SUITE(FP32) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - shape, policy) -{ - // Create tensors - CLTensor ref_src1 = create_tensor(shape, DataType::F32); - CLTensor ref_src2 = create_tensor(shape, DataType::F32); - CLTensor dst = create_tensor(shape, DataType::F32); - - // Create and Configure function - CLArithmeticAddition add; - add.configure(&ref_src1, &ref_src2, &dst, policy); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), num_elems_processed_per_iteration).required_padding(); - validate(ref_src1.info()->padding(), padding); - validate(ref_src2.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFloatFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionFP32Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), EmptyActivationFunctionsDataset)) diff --git a/tests/validation/GLES_COMPUTE/ArithmeticAddition.cpp b/tests/validation/GLES_COMPUTE/ArithmeticAddition.cpp old mode 100755 new mode 100644 index 82946fa209..336870ac8a --- a/tests/validation/GLES_COMPUTE/ArithmeticAddition.cpp +++ b/tests/validation/GLES_COMPUTE/ArithmeticAddition.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -56,8 +56,9 @@ using GCArithmeticAdditionFixture = ArithmeticAdditionValidationFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ArithmeticAdditionFP16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) +FIXTURE_DATA_TEST_CASE(RunSmall, GCArithmeticAdditionFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionFP16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + framework::dataset::make("InPlace", { false }))) { // Validate output validate(GCAccessor(_target), _reference); diff --git a/tests/validation/NEON/ArithmeticAddition.cpp b/tests/validation/NEON/ArithmeticAddition.cpp index 72993172fd..19ebfedcb5 100644 --- a/tests/validation/NEON/ArithmeticAddition.cpp +++ b/tests/validation/NEON/ArithmeticAddition.cpp @@ -104,13 +104,33 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( ConvertPolicy::WRAP); ARM_COMPUTE_EXPECT(bool(s) == expected, framework::LogLevel::ERRORS); } + +DATA_TEST_CASE(ValidateInPlace, framework::DatasetMode::ALL, zip(zip( + framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), + TensorInfo(TensorShape(1U, 13U, 2U), 1, DataType::U8), // Unsupported broadcast on input1 with in-place computation + }), + framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + TensorInfo(TensorShape(1U, 13U, 2U), 1, DataType::U8), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + })), + framework::dataset::make("Expected", { true, true, false })), + input1_info, input2_info, expected) +{ + Status s = NEArithmeticAddition::validate(&input1_info.clone()->set_is_resizable(false), + &input2_info.clone()->set_is_resizable(false), + nullptr, + ConvertPolicy::WRAP); + ARM_COMPUTE_EXPECT(bool(s) == expected, framework::LogLevel::ERRORS); +} // clang-format on // *INDENT-ON* TEST_SUITE(Integer) TEST_SUITE(U8) -FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticAdditionU8Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) +FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionU8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + framework::dataset::make("InPlace", { false, true }))) { // Validate output validate(Accessor(_target), _reference); @@ -118,15 +138,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture, framework TEST_SUITE_END() // U8 TEST_SUITE(S16) -FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticAdditionS16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) +FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionS16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + framework::dataset::make("InPlace", { false }))) { // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticAdditionFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ArithmeticAdditionS16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) +FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticAdditionFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticAdditionS16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + framework::dataset::make("InPlace", { false }))) { // Validate output validate(Accessor(_target), _reference); @@ -134,8 +156,9 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticAdditionFixture, framework TEST_SUITE_END() // S16 TEST_SUITE(S32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ArithmeticAdditionS32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) +FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionS32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + framework::dataset::make("InPlace", { false }))) { // Validate output validate(Accessor(_target), _reference); @@ -146,8 +169,9 @@ TEST_SUITE_END() // Integer TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(F16) -FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ArithmeticAdditionFP16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) +FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionFP16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + framework::dataset::make("InPlace", { false }))) { // Validate output validate(Accessor(_target), _reference); @@ -156,15 +180,17 @@ TEST_SUITE_END() // F16 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(F32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticAdditionFP32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) +FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionFP32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + framework::dataset::make("InPlace", { false }))) { // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticAdditionFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ArithmeticAdditionFP32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) +FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticAdditionFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticAdditionFP32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + framework::dataset::make("InPlace", { false }))) { // Validate output validate(Accessor(_target), _reference); @@ -173,17 +199,28 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticAdditionFixture, framework:: template using NEArithmeticAdditionBroadcastFixture = ArithmeticAdditionBroadcastValidationFixture; -FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEArithmeticAdditionBroadcastFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapesBroadcast(), +FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEArithmeticAdditionBroadcastFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapesBroadcast(), + ArithmeticAdditionFP32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(Accessor(_target), _reference); +} + +FIXTURE_DATA_TEST_CASE(RunSmallBroadcastInPlace, NEArithmeticAdditionBroadcastFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapesBroadcastInPlace(), ArithmeticAdditionFP32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + framework::dataset::make("InPlace", { true }))) { // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, NEArithmeticAdditionBroadcastFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapesBroadcast(), +FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, NEArithmeticAdditionBroadcastFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapesBroadcast(), ArithmeticAdditionFP32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + framework::dataset::make("InPlace", { false }))) { // Validate output validate(Accessor(_target), _reference); diff --git a/tests/validation/fixtures/ArithmeticOperationsFixture.h b/tests/validation/fixtures/ArithmeticOperationsFixture.h index 4a6b0bd3f3..1019e60233 100644 --- a/tests/validation/fixtures/ArithmeticOperationsFixture.h +++ b/tests/validation/fixtures/ArithmeticOperationsFixture.h @@ -48,8 +48,10 @@ public: template void setup(reference::ArithmeticOperation op, const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, - QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, ActivationLayerInfo act_info) + QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, ActivationLayerInfo act_info, + bool in_place) { + _in_place = in_place; _op = op; _act_info = act_info; _target = compute_target(shape0, shape1, data_type0, data_type1, output_data_type, convert_policy, qinfo0, qinfo1, qinfo_out); @@ -71,9 +73,11 @@ protected: TensorType ref_src2 = create_tensor(shape1, data_type1, 1, qinfo1); TensorType dst = create_tensor(TensorShape::broadcast_shape(shape0, shape1), output_data_type, 1, qinfo_out); + TensorType *dst_ptr = _in_place ? nullptr : &dst; + // Create and configure function FunctionType arith_op; - arith_op.configure(&ref_src1, &ref_src2, &dst, convert_policy, _act_info); + arith_op.configure(&ref_src1, &ref_src2, dst_ptr, convert_policy, _act_info); ARM_COMPUTE_EXPECT(ref_src1.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(ref_src2.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -82,11 +86,15 @@ protected: // Allocate tensors ref_src1.allocator()->allocate(); ref_src2.allocator()->allocate(); - dst.allocator()->allocate(); ARM_COMPUTE_EXPECT(!ref_src1.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(!ref_src2.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + if(!_in_place) + { + dst.allocator()->allocate(); + ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + } // Fill tensors fill(AccessorType(ref_src1), 0); @@ -95,7 +103,14 @@ protected: // Compute function arith_op.run(); - return dst; + if(_in_place) + { + return ref_src1; + } + else + { + return dst; + } } SimpleTensor compute_reference(const TensorShape &shape0, const TensorShape &shape1, @@ -119,6 +134,7 @@ protected: SimpleTensor _reference{}; reference::ArithmeticOperation _op{ reference::ArithmeticOperation::ADD }; ActivationLayerInfo _act_info{}; + bool _in_place{}; }; template @@ -126,10 +142,10 @@ class ArithmeticAdditionBroadcastValidationFixture : public ArithmeticOperationG { public: template - void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy) + void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, bool in_place) { ArithmeticOperationGenericFixture::setup(reference::ArithmeticOperation::ADD, shape0, shape1, data_type0, data_type1, - output_data_type, convert_policy, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo()); + output_data_type, convert_policy, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), in_place); } }; @@ -138,10 +154,10 @@ class ArithmeticAdditionValidationFixture : public ArithmeticOperationGenericFix { public: template - void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy) + void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, bool in_place) { ArithmeticOperationGenericFixture::setup(reference::ArithmeticOperation::ADD, shape, shape, data_type0, data_type1, - output_data_type, convert_policy, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo()); + output_data_type, convert_policy, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), in_place); } }; @@ -153,7 +169,7 @@ public: void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info) { ArithmeticOperationGenericFixture::setup(reference::ArithmeticOperation::ADD, shape0, shape1, data_type0, data_type1, - output_data_type, convert_policy, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info); + output_data_type, convert_policy, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, false); } }; @@ -165,7 +181,7 @@ public: void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info) { ArithmeticOperationGenericFixture::setup(reference::ArithmeticOperation::ADD, shape, shape, data_type0, data_type1, - output_data_type, convert_policy, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info); + output_data_type, convert_policy, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, false); } }; @@ -179,7 +195,7 @@ public: { ArithmeticOperationGenericFixture::setup(reference::ArithmeticOperation::ADD, shape, shape, data_type0, data_type1, - output_data_type, convert_policy, qinfo0, qinfo1, qinfo_out, ActivationLayerInfo()); + output_data_type, convert_policy, qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), false); } }; @@ -192,7 +208,7 @@ public: { ArithmeticOperationGenericFixture::setup(reference::ArithmeticOperation::SUB, shape0, shape1, data_type0, data_type1, output_data_type, convert_policy, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo()); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), false); } }; @@ -205,7 +221,7 @@ public: { ArithmeticOperationGenericFixture::setup(reference::ArithmeticOperation::SUB, shape0, shape1, data_type0, data_type1, output_data_type, convert_policy, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, false); } }; @@ -218,7 +234,7 @@ public: { ArithmeticOperationGenericFixture::setup(reference::ArithmeticOperation::SUB, shape, shape, data_type0, data_type1, output_data_type, convert_policy, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo()); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), false); } }; @@ -231,7 +247,7 @@ public: { ArithmeticOperationGenericFixture::setup(reference::ArithmeticOperation::SUB, shape, shape, data_type0, data_type1, output_data_type, convert_policy, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, false); } }; @@ -246,7 +262,7 @@ public: { ArithmeticOperationGenericFixture::setup(reference::ArithmeticOperation::SUB, shape, shape, data_type0, data_type1, output_data_type, - convert_policy, qinfo0, qinfo1, qinfo_out, ActivationLayerInfo()); + convert_policy, qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), false); } }; } // namespace validation -- cgit v1.2.1