diff options
author | Gunes Bayir <gunes.bayir@arm.com> | 2023-01-29 13:24:24 +0000 |
---|---|---|
committer | Gunes Bayir <gunes.bayir@arm.com> | 2023-02-01 09:59:30 +0000 |
commit | ae72a46e495742863dba44fcf5fdc673c9d2afbc (patch) | |
tree | 65bab43d0feddaa66b160ac7dc746651dc7c48de /tests | |
parent | ec320d9fc418e2d95a3a38ce87233397535f467d (diff) | |
download | ComputeLibrary-ae72a46e495742863dba44fcf5fdc673c9d2afbc.tar.gz |
Add new operator AddMulAdd for Neon™ backend for Float/Quantized types
This is a fused operator that merges Add + Mul + Add [+ Relu-based-Activation] layers and have an intermediate output after the first Add. It's supported for FP16/32/QASYMM8/QASYMM8_SIGNED data types.
The subsequent Add and Mul are intended for scaling and the coefficients only have one dimension (per channel).
The inputs are
- input1 : nD tensor [X, Y, Z, W, ..]
- input2 : nD tensor [X, Y, Z, W, ..]
- add_coef : 1D tensor [X]
- mul_coef : 1D tensor [X]
The outputs are
- out1 : nD tensor (intermediate output) [X, Y, Z, W, ..]
- out2 : nD tensor (final output) [X, Y, Z, W, ..]
The operation can be summarized as follows:
out1 <- input1 + input2
out2 <- Act(out1 * mul_coef + add_coef)
The activation function can be Identity, Relu, Bounded Relu or Lower/Upper Bounded Relu. The intermediate output can be skipped by providing a nullptr.
The reason of providing this operator is to be able to fuse in case of Residual network patterns and save computations by reducing memory back and forward.
Resolves: COMPMID-5463
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Change-Id: I8ef577aa623b036e9a9f655cc088493fd19a6109
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9055
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/validation/NEON/AddMulAdd.cpp | 230 | ||||
-rw-r--r-- | tests/validation/fixtures/AddMulAddFixture.h | 268 |
2 files changed, 498 insertions, 0 deletions
diff --git a/tests/validation/NEON/AddMulAdd.cpp b/tests/validation/NEON/AddMulAdd.cpp new file mode 100644 index 0000000000..f0aba7833b --- /dev/null +++ b/tests/validation/NEON/AddMulAdd.cpp @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEAddMulAdd.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include "tests/NEON/Accessor.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/AddMulAddFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */ +const AbsoluteTolerance<half> tolerance_fp16(half(0.1f)); /**< Tolerance for 16-bit floating point tests */ +constexpr AbsoluteTolerance<float> tolerance_quant(1); /**< Tolerance for quantized tests */ + +const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", +{ + ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + + // Boundaries are aligned with Quantized Data ranges -- DOUBLE check before changing + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, -2.f) +}); + +// QASYMM8 test quantizations +const auto qasymm8_input1_qinfo_set = framework::dataset::make("Input1QInfo", { QuantizationInfo(0.1, 10) }); // Representable Range: [-1, 24.5] +const auto qasymm8_input2_qinfo_set = framework::dataset::make("Input2QInfo", { QuantizationInfo(0.2, 60) }); // Representable Range: [-12, 39] +const auto qasymm8_bn_mul_qinfo_set = framework::dataset::make("BnMulInfo", { QuantizationInfo(0.001, 55) }); // Representable Range: [-0.11, 0.2] +const auto qasymm8_bn_add_qinfo_set = framework::dataset::make("BnAddInfo", { QuantizationInfo(0.02, 20) }); // Representable Range: [-0.4, 4.7] + +// Representable Range: [-9.36, 51.84], Expected F32 range: [-13, 63.5], leaving some space for saturation +const auto qasymm8_add_output_qinfo_set = framework::dataset::make("AddOutputInfo", { QuantizationInfo(0.24, 39) }); + +// Representable Range: [-4.8, 10.5], Expected FP32 range: [-6.985, 12.7], leaving some space for saturation +// This range also makes sense with the activation boundaries above, i.e. [-2, 8] for LU_BOUNDED_RELU and [0, 6] for BOUNDED_RELU +const auto qasymm8_final_output_qinfo_set = framework::dataset::make("FinalOutputInfo", { QuantizationInfo(0.06, 80) }); + +// QASYMM8_SIGNED test quantizations +const auto qasymm8_signed_input1_qinfo_set = framework::dataset::make("Input1QInfo", { QuantizationInfo(0.1, 10) }); // Representable Range: [-13.8, 11.7] +const auto qasymm8_signed_input2_qinfo_set = framework::dataset::make("Input2QInfo", { QuantizationInfo(0.2, -60) }); // Representable Range: [-13.6, 39.4] +const auto qasymm8_signed_bn_mul_qinfo_set = framework::dataset::make("BnMulInfo", { QuantizationInfo(0.001, 55) }); // Representable Range: [-0.183, 0.072] +const auto qasymm8_signed_bn_add_qinfo_set = framework::dataset::make("BnAddInfo", { QuantizationInfo(0.4, -120) }); // Representable Range: [-0.32, 9.08] + +// Representable Range: [-21.36, 39.84], Expected F32 range: [-27.4, 51.1], leaving some space for saturation +const auto qasymm8_signed_add_output_qinfo_set = framework::dataset::make("AddOutputInfo", { QuantizationInfo(0.24, -39) }); + +// Representable Range: [-4.8, 10.5], Expected FP32 range: [-9.6713, 14.0942], leaving some space for saturation +// This range also makes sense with the activation boundaries above, i.e. [-2, 8] for LU_BOUNDED_RELU and [0, 6] for BOUNDED_RELU +const auto qasymm8_signed_final_output_qinfo_set = framework::dataset::make("FinalOutputInfo", { QuantizationInfo(0.06, -48) }); + +} // namespace + +TEST_SUITE(NEON) +TEST_SUITE(AddMulAdd) + +template <typename T> +using NEAddMulAddFloatFixture = AddMulAddFloatValidationFixture<Tensor, Accessor, NEAddMulAdd, T, true>; + +template <typename T> +using NEAddMulAddFloatFixtureWoIntermOut = AddMulAddFloatValidationFixture<Tensor, Accessor, NEAddMulAdd, T, false>; + +TEST_SUITE(Float) + +TEST_SUITE(F32) +FIXTURE_DATA_TEST_CASE(RunSmall, NEAddMulAddFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::F32)), + ActivationFunctionsDataset)) +{ + // Validate outputs + validate(Accessor(_interm_target), _interm_reference); // Arithmetic Addition has more strict tolerance + validate(Accessor(_target), _reference, tolerance_fp32); +} + +// This test is to stress the case when there is no intermediate output required (i.e. nullptr) +FIXTURE_DATA_TEST_CASE(RunSmallWithoutIntermOutput, NEAddMulAddFloatFixtureWoIntermOut<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("ActivationInfo", { ActivationLayerInfo() }))) +{ + // Validate outputs + validate(Accessor(_target), _reference, tolerance_fp32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEAddMulAddFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), + framework::dataset::make("DataType", DataType::F32)), + ActivationFunctionsDataset)) +{ + // Validate outputs + validate(Accessor(_interm_target), _interm_reference); // Arithmetic Addition has more strict tolerance + validate(Accessor(_target), _reference, tolerance_fp32); +} + +TEST_SUITE_END() // F32 + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +TEST_SUITE(F16) +FIXTURE_DATA_TEST_CASE(RunSmall, NEAddMulAddFloatFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::F16)), + ActivationFunctionsDataset)) +{ + // Validate outputs + validate(Accessor(_interm_target), _interm_reference); // Arithmetic Addition has more strict tolerance + validate(Accessor(_target), _reference, tolerance_fp16); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEAddMulAddFloatFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), + framework::dataset::make("DataType", DataType::F16)), + ActivationFunctionsDataset)) +{ + // Validate outputs + validate(Accessor(_interm_target), _interm_reference); // Arithmetic Addition has more strict tolerance + validate(Accessor(_target), _reference, tolerance_fp16); +} +TEST_SUITE_END() // F16 +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + +TEST_SUITE_END() // Float + +template <typename T> +using NEAddMulQuantizedFixture = AddMulAddQuantizedValidationFixture<Tensor, Accessor, NEAddMulAdd, T, true>; + +template <typename T> +using NEAddMulAddQuantizedFixtureWoIntermOut = AddMulAddQuantizedValidationFixture<Tensor, Accessor, NEAddMulAdd, T, false>; + +TEST_SUITE(Quantized) + +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, NEAddMulQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(combine(datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::QASYMM8)), + ActivationFunctionsDataset), + qasymm8_input1_qinfo_set), + qasymm8_input2_qinfo_set), + qasymm8_bn_mul_qinfo_set), + qasymm8_bn_add_qinfo_set), + qasymm8_add_output_qinfo_set), + qasymm8_final_output_qinfo_set)) +{ + // Validate outputs + validate(Accessor(_interm_target), _interm_reference, tolerance_quant); + validate(Accessor(_target), _reference, tolerance_quant); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEAddMulQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(combine(datasets::LargeShapes(), + framework::dataset::make("DataType", DataType::QASYMM8)), + ActivationFunctionsDataset), + qasymm8_input1_qinfo_set), + qasymm8_input2_qinfo_set), + qasymm8_bn_mul_qinfo_set), + qasymm8_bn_add_qinfo_set), + qasymm8_add_output_qinfo_set), + qasymm8_final_output_qinfo_set)) +{ + // Validate outputs + validate(Accessor(_interm_target), _interm_reference, tolerance_quant); + validate(Accessor(_target), _reference, tolerance_quant); +} +TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, NEAddMulQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(combine(datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + ActivationFunctionsDataset), + qasymm8_signed_input1_qinfo_set), + qasymm8_signed_input2_qinfo_set), + qasymm8_signed_bn_mul_qinfo_set), + qasymm8_signed_bn_add_qinfo_set), + qasymm8_signed_add_output_qinfo_set), + qasymm8_signed_final_output_qinfo_set)) +{ + // Validate outputs + validate(Accessor(_interm_target), _interm_reference, tolerance_quant); + validate(Accessor(_target), _reference, tolerance_quant); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEAddMulQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(combine(datasets::LargeShapes(), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + ActivationFunctionsDataset), + qasymm8_signed_input1_qinfo_set), + qasymm8_signed_input2_qinfo_set), + qasymm8_signed_bn_mul_qinfo_set), + qasymm8_signed_bn_add_qinfo_set), + qasymm8_signed_add_output_qinfo_set), + qasymm8_signed_final_output_qinfo_set)) +{ + // Validate outputs + validate(Accessor(_interm_target), _interm_reference, tolerance_quant); + validate(Accessor(_target), _reference, tolerance_quant); +} +TEST_SUITE_END() // QASYMM8_SIGNED + +TEST_SUITE_END() // Quantized + +TEST_SUITE_END() // AddMulAdd +TEST_SUITE_END() // NEON +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/fixtures/AddMulAddFixture.h b/tests/validation/fixtures/AddMulAddFixture.h new file mode 100644 index 0000000000..fac2bfe528 --- /dev/null +++ b/tests/validation/fixtures/AddMulAddFixture.h @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef TESTS_VALIDATION_FIXTURES_ADDMULADDFIXTURE +#define TESTS_VALIDATION_FIXTURES_ADDMULADDFIXTURE + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "tests/AssetsLibrary.h" +#include "tests/Globals.h" +#include "tests/IAccessor.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/validation/Helpers.h" +#include "tests/validation/reference/ActivationLayer.h" +#include "tests/validation/reference/ArithmeticOperations.h" +#include "tests/validation/reference/DequantizationLayer.h" +#include "tests/validation/reference/PixelWiseMultiplication.h" +#include "tests/validation/reference/QuantizationLayer.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class AddMulAddGenericFixture : public framework::Fixture +{ +public: + template <typename...> + void setup(const TensorShape &shape, DataType data_type, ActivationLayerInfo &act_info, bool interm_out) + { + compute_target(shape, data_type, act_info, interm_out); + } + +protected: + template <typename U> + void fill(U &&tensor, int i, DataType data_type) + { + switch(data_type) + { + case DataType::F32: + library->fill_tensor_uniform(tensor, i, -10.f, 10.f); + break; + case DataType::F16: + library->fill_tensor_uniform(tensor, i, -1.f, 1.f); + break; + default: + library->fill_tensor_uniform(tensor, i); + break; + } + } + + void compute_target(const TensorShape &shape, DataType data_type, ActivationLayerInfo &act_info, bool interm_out) + { + TensorShape b_shape(shape.x()); + + // Create tensors + TensorType input1 = create_tensor<TensorType>(shape, data_type, 1, _input1_qinfo); + TensorType input2 = create_tensor<TensorType>(shape, data_type, 1, _input2_qinfo); + TensorType bn_mul = create_tensor<TensorType>(b_shape, data_type, 1, _bn_mul_qinfo); + TensorType bn_add = create_tensor<TensorType>(b_shape, data_type, 1, _bn_add_qinfo); + TensorType add_output = create_tensor<TensorType>(shape, data_type, 1, _add_output_qinfo); + TensorType final_output = create_tensor<TensorType>(shape, data_type, 1, _final_output_qinfo); + + // Create and configure function + FunctionType add_mul_add; + add_mul_add.configure(&input1, &input2, &bn_mul, &bn_add, interm_out ? &add_output : nullptr, &final_output, ConvertPolicy::SATURATE, act_info); + + // Allocate tensors + input1.allocator()->allocate(); + input2.allocator()->allocate(); + bn_mul.allocator()->allocate(); + bn_add.allocator()->allocate(); + + if(interm_out) + { + add_output.allocator()->allocate(); + } + + final_output.allocator()->allocate(); + + // Fill tensors + fill(AccessorType(input1), 0, data_type); + fill(AccessorType(input2), 1, data_type); + fill(AccessorType(bn_mul), 2, data_type); + fill(AccessorType(bn_add), 3, data_type); + + // // Compute function + add_mul_add.run(); + + _target = std::move(final_output); + + if(interm_out) + { + _interm_target = std::move(add_output); + } + } + + TensorType _target{}; + TensorType _interm_target{}; + SimpleTensor<T> _reference{}; + SimpleTensor<T> _interm_reference{}; + + QuantizationInfo _input1_qinfo{}; + QuantizationInfo _input2_qinfo{}; + QuantizationInfo _bn_mul_qinfo{}; + QuantizationInfo _bn_add_qinfo{}; + QuantizationInfo _add_output_qinfo{}; + QuantizationInfo _final_output_qinfo{}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool interm_out> +class AddMulAddFloatValidationFixture : public AddMulAddGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + using Parent = AddMulAddGenericFixture<TensorType, AccessorType, FunctionType, T>; + + template <typename...> + void setup(const TensorShape &shape, DataType data_type, ActivationLayerInfo act_info) + { + Parent::setup(shape, data_type, act_info, interm_out); + compute_reference(shape, data_type, act_info); + } + + // Compute Reference is moved outside of the generic fixture because with the quantized data types, + // it becomes a very different implementation with intermediate tensors' data types being always float. + // This way the reference calculations are more readable and the size of the classes will be smaller + // due to unrepeated fill() and target() methods. + void compute_reference(const TensorShape &shape, DataType data_type, ActivationLayerInfo &act_info) + { + TensorShape b_shape(shape.x()); + + // Create reference + SimpleTensor<T> input1{ shape, data_type }; + SimpleTensor<T> input2{ shape, data_type }; + SimpleTensor<T> bn_mul{ b_shape, data_type }; + SimpleTensor<T> bn_add{ b_shape, data_type }; + SimpleTensor<T> add_output{ shape, data_type, 1 }; + + SimpleTensor<T> bn_mul_out{ shape, data_type }; + SimpleTensor<T> bn_add_out{ shape, data_type }; + + // Fill reference + Parent::fill(input1, 0, data_type); + Parent::fill(input2, 1, data_type); + Parent::fill(bn_mul, 2, data_type); + Parent::fill(bn_add, 3, data_type); + + reference::arithmetic_operation<T>(reference::ArithmeticOperation::ADD, input1, input2, add_output, ConvertPolicy::SATURATE); + bn_mul_out = reference::pixel_wise_multiplication<T, T, T>(add_output, bn_mul, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_UP, data_type); + reference::arithmetic_operation<T>(reference::ArithmeticOperation::ADD, bn_mul_out, bn_add, bn_add_out, ConvertPolicy::SATURATE); + + if(interm_out) + { + Parent::_interm_reference = std::move(add_output); + } + + if(act_info.enabled() && act_info.activation() != ActivationLayerInfo::ActivationFunction::IDENTITY) + { + Parent::_reference = reference::activation_layer(bn_add_out, act_info); + } + else + { + Parent::_reference = std::move(bn_add_out); + } + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool interm_out> +class AddMulAddQuantizedValidationFixture : public AddMulAddGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + using Parent = AddMulAddGenericFixture<TensorType, AccessorType, FunctionType, T>; + + template <typename...> + void setup(const TensorShape &shape, DataType data_type, ActivationLayerInfo act_info, + QuantizationInfo input1_qinfo, QuantizationInfo input2_qinfo, QuantizationInfo bn_mul_qinfo, + QuantizationInfo bn_add_qinfo, QuantizationInfo add_output_qinfo, QuantizationInfo final_output_qinfo) + { + // Quantization arguments moved to class attributes to prevent long function declerations + Parent::_input1_qinfo = input1_qinfo; + Parent::_input2_qinfo = input2_qinfo; + Parent::_bn_mul_qinfo = bn_mul_qinfo; + Parent::_bn_add_qinfo = bn_add_qinfo; + Parent::_add_output_qinfo = add_output_qinfo; + Parent::_final_output_qinfo = final_output_qinfo; + + Parent::setup(shape, data_type, act_info, interm_out); + compute_reference(shape, data_type, act_info); + } + + // Compute Reference is moved outside of the generic fixture because with the quantized data types, + // it becomes a very different implementation with intermediate tensors' data types being always float. + // This way the reference calculations are more readable and the size of the classes will be smaller + // due to unrepeated fill() and target() methods. + void compute_reference(const TensorShape &shape, DataType data_type, ActivationLayerInfo &act_info) + { + TensorShape b_shape(shape.x()); + + // Create reference + SimpleTensor<T> input1{ shape, data_type, 1, Parent::_input1_qinfo }; + SimpleTensor<T> input2{ shape, data_type, 1, Parent::_input2_qinfo }; + SimpleTensor<T> bn_mul{ b_shape, data_type, 1, Parent::_bn_mul_qinfo }; + SimpleTensor<T> bn_add{ b_shape, data_type, 1, Parent::_bn_add_qinfo }; + + // Fill input tensors + Parent::fill(input1, 0, data_type); + Parent::fill(input2, 1, data_type); + Parent::fill(bn_mul, 2, data_type); + Parent::fill(bn_add, 3, data_type); + + SimpleTensor<float> input1_dequantized = reference::dequantization_layer<float>(input1); + SimpleTensor<float> input2_dequantized = reference::dequantization_layer<float>(input2); + SimpleTensor<float> bn_mul_dequantized = reference::dequantization_layer<float>(bn_mul); + SimpleTensor<float> bn_add_dequantized = reference::dequantization_layer<float>(bn_add); + + SimpleTensor<float> add_output_dequantized{ shape, DataType::F32 }; + SimpleTensor<float> bn_add_out_dequantized{ shape, DataType::F32 }; + + reference::arithmetic_operation<float>(reference::ArithmeticOperation::ADD, input1_dequantized, input2_dequantized, add_output_dequantized, ConvertPolicy::SATURATE); + SimpleTensor<float> bn_mul_out_dequantized = reference::pixel_wise_multiplication<float, float, float>(add_output_dequantized, bn_mul_dequantized, 1.f, ConvertPolicy::SATURATE, + RoundingPolicy::TO_NEAREST_UP, DataType::F32); + reference::arithmetic_operation<float>(reference::ArithmeticOperation::ADD, bn_mul_out_dequantized, bn_add_dequantized, bn_add_out_dequantized, ConvertPolicy::SATURATE); + + if(interm_out) + { + Parent::_interm_reference = reference::quantization_layer<float, T>(add_output_dequantized, data_type, Parent::_add_output_qinfo); + } + + if(act_info.enabled() && act_info.activation() != ActivationLayerInfo::ActivationFunction::IDENTITY) + { + SimpleTensor<T> ref = reference::quantization_layer<float, T>(bn_add_out_dequantized, data_type, Parent::_final_output_qinfo); + Parent::_reference = reference::activation_layer(ref, act_info); + } + else + { + Parent::_reference = reference::quantization_layer<float, T>(bn_add_out_dequantized, data_type, Parent::_final_output_qinfo); + } + } +}; +} // namespace validation +} // namespace test +} // namespace arm_compute + +#endif /* TESTS_VALIDATION_FIXTURES_ADDMULADDFIXTURE */ |