diff options
Diffstat (limited to 'tests/validation')
22 files changed, 1071 insertions, 298 deletions
diff --git a/tests/validation/CL/GEMMLowp.cpp b/tests/validation/CL/GEMMLowp.cpp index 1ae9e96626..78d794a9bb 100644 --- a/tests/validation/CL/GEMMLowp.cpp +++ b/tests/validation/CL/GEMMLowp.cpp @@ -71,7 +71,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFixture, framework: } using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned = - GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, false, uint8_t, uint8_t, true>; + GEMMLowpBatchedMatrixMultiplyCoreFusedOffsetOutputFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, false, uint8_t, uint8_t, true>; TEST_SUITE(BatchedMatMul) TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned, framework::DatasetMode::ALL, @@ -84,7 +84,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFi TEST_SUITE_END() // QASYMM8 using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned = - GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t, true>; + GEMMLowpBatchedMatrixMultiplyCoreFusedOffsetOutputFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t, true>; TEST_SUITE(QASYMM8_SIGNED) FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(), @@ -98,7 +98,7 @@ TEST_SUITE_END() // BatchedMatMul TEST_SUITE(FusedOffsetOutput) TEST_SUITE(QASYMM8) -using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore>; +using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore>; FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(), make("DataType", { DataType::QASYMM8 }), @@ -110,7 +110,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUi TEST_SUITE(Output3D) using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputOutput3DUint8Fixture = - GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, true>; + GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, true>; FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputOutput3DUint8Fixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputOutput3DUint8Dataset(), make("DataType", { DataType::QASYMM8 }), @@ -123,7 +123,7 @@ TEST_SUITE_END() // Output3D TEST_SUITE(InputOutput3D) using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInputOutput3DUint8Fixture = - GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, true, true>; + GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, true, true>; FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInputOutput3DUint8Fixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputInputOutput3DUint8Dataset(), make("DataType", { DataType::QASYMM8 }), @@ -148,7 +148,8 @@ using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInt8Fixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t>; FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInt8Fixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputInt8Dataset(), - make("DataType", { DataType::QASYMM8_SIGNED }))) + make("DataType", { DataType::QASYMM8_SIGNED }), + make("reshape_b_only_on_first_run", { false }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_quant); diff --git a/tests/validation/CL/ScatterLayer.cpp b/tests/validation/CL/ScatterLayer.cpp index 56338f489f..b1531eb64a 100644 --- a/tests/validation/CL/ScatterLayer.cpp +++ b/tests/validation/CL/ScatterLayer.cpp @@ -38,6 +38,12 @@ namespace test { namespace validation { +namespace +{ +RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for fp32 data type */ +RelativeTolerance<float> tolerance_f16(0.02f); /**< Tolerance value for comparing reference's output against implementation's output for fp16 data type */ +RelativeTolerance<int32_t> tolerance_int(0); /**< Tolerance value for comparing reference's output against implementation's output for integer data types */ +} // namespace template <typename T> using CLScatterLayerFixture = ScatterValidationFixture<CLTensor, CLAccessor, CLScatter, T>; @@ -46,69 +52,245 @@ using framework::dataset::make; TEST_SUITE(CL) TEST_SUITE(Scatter) -DATA_TEST_CASE(Validate, framework::DatasetMode::DISABLED, zip( +DATA_TEST_CASE(Validate, framework::DatasetMode::PRECOMMIT, zip( make("InputInfo", { TensorInfo(TensorShape(9U), 1, DataType::F32), // Mismatching data types - TensorInfo(TensorShape(15U), 1, DataType::F32), // Valid - TensorInfo(TensorShape(8U), 1, DataType::F32), - TensorInfo(TensorShape(217U), 1, DataType::F32), // Mismatch input/output dims. - TensorInfo(TensorShape(217U), 1, DataType::F32), // Updates dim higher than Input/Output dims. - TensorInfo(TensorShape(12U), 1, DataType::F32), // Indices wrong datatype. - }), - make("UpdatesInfo",{ TensorInfo(TensorShape(3U), 1, DataType::F16), - TensorInfo(TensorShape(15U), 1, DataType::F32), - TensorInfo(TensorShape(2U), 1, DataType::F32), - TensorInfo(TensorShape(217U), 1, DataType::F32), - TensorInfo(TensorShape(217U, 3U), 1, DataType::F32), - TensorInfo(TensorShape(2U), 1, DataType::F32), - }), - make("IndicesInfo",{ TensorInfo(TensorShape(3U), 1, DataType::U32), - TensorInfo(TensorShape(15U), 1, DataType::U32), - TensorInfo(TensorShape(2U), 1, DataType::U32), - TensorInfo(TensorShape(271U), 1, DataType::U32), - TensorInfo(TensorShape(271U), 1, DataType::U32), - TensorInfo(TensorShape(2U), 1 , DataType::S32) - }), - make("OutputInfo",{ TensorInfo(TensorShape(9U), 1, DataType::F16), - TensorInfo(TensorShape(15U), 1, DataType::F32), - TensorInfo(TensorShape(8U), 1, DataType::F32), - TensorInfo(TensorShape(271U, 3U), 1, DataType::F32), - TensorInfo(TensorShape(271U), 1, DataType::F32), - TensorInfo(TensorShape(12U), 1, DataType::F32) - }), + TensorInfo(TensorShape(15U), 1, DataType::F32), // Valid + TensorInfo(TensorShape(15U), 1, DataType::U8), // Valid + TensorInfo(TensorShape(8U), 1, DataType::F32), + TensorInfo(TensorShape(217U), 1, DataType::F32), // Mismatch input/output dims. + TensorInfo(TensorShape(217U), 1, DataType::F32), // Updates dim higher than Input/Output dims. + TensorInfo(TensorShape(12U), 1, DataType::F32), // Indices wrong datatype. + TensorInfo(TensorShape(9U, 3U, 4U), 1, DataType::F32), // Number of updates != number of indices + TensorInfo(TensorShape(17U, 3U, 3U, 2U), 1, DataType::F32), // index_len != (dst_dims - upt_dims + 1) + TensorInfo(TensorShape(17U, 3U, 3U, 2U, 2U, 2U), 1, DataType::F32), // index_len > 5 + }), + make("UpdatesInfo",{TensorInfo(TensorShape(3U), 1, DataType::F16), + TensorInfo(TensorShape(15U), 1, DataType::F32), + TensorInfo(TensorShape(15U), 1, DataType::U8), + TensorInfo(TensorShape(2U), 1, DataType::F32), + TensorInfo(TensorShape(217U), 1, DataType::F32), + TensorInfo(TensorShape(217U, 3U), 1, DataType::F32), + TensorInfo(TensorShape(2U), 1, DataType::F32), + TensorInfo(TensorShape(9U, 3U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(17U, 3U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(1U), 1, DataType::F32), + }), + make("IndicesInfo",{TensorInfo(TensorShape(1U, 3U), 1, DataType::S32), + TensorInfo(TensorShape(1U, 15U), 1, DataType::S32), + TensorInfo(TensorShape(1U, 15U), 1, DataType::S32), + TensorInfo(TensorShape(1U, 2U), 1, DataType::S32), + TensorInfo(TensorShape(1U, 271U), 1, DataType::S32), + TensorInfo(TensorShape(1U, 271U), 1, DataType::S32), + TensorInfo(TensorShape(1U, 2U), 1 , DataType::F32), + TensorInfo(TensorShape(1U, 4U), 1, DataType::S32), + TensorInfo(TensorShape(3U, 2U), 1, DataType::S32), + TensorInfo(TensorShape(6U, 2U), 1, DataType::S32), + }), + make("OutputInfo",{TensorInfo(TensorShape(9U), 1, DataType::F16), + TensorInfo(TensorShape(15U), 1, DataType::F32), + TensorInfo(TensorShape(15U), 1, DataType::U8), + TensorInfo(TensorShape(8U), 1, DataType::F32), + TensorInfo(TensorShape(271U, 3U), 1, DataType::F32), + TensorInfo(TensorShape(271U), 1, DataType::F32), + TensorInfo(TensorShape(12U), 1, DataType::F32), + TensorInfo(TensorShape(9U, 3U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(17U, 3U, 3U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(17U, 3U, 3U, 2U, 2U, 2U), 1, DataType::F32), + }), make("ScatterInfo",{ ScatterInfo(ScatterFunction::Add, false), - }), - make("Expected", { false, true, true, false, false, false })), + ScatterInfo(ScatterFunction::Max, false), + ScatterInfo(ScatterFunction::Max, false), + ScatterInfo(ScatterFunction::Min, false), + ScatterInfo(ScatterFunction::Add, false), + ScatterInfo(ScatterFunction::Update, false), + ScatterInfo(ScatterFunction::Sub, false), + ScatterInfo(ScatterFunction::Sub, false), + ScatterInfo(ScatterFunction::Update, false), + ScatterInfo(ScatterFunction::Update, false), + }), + make("Expected", { false, true, true, true, false, false, false, false, false, false })), input_info, updates_info, indices_info, output_info, scatter_info, expected) { - // TODO: Enable validation tests. - ARM_COMPUTE_UNUSED(input_info); - ARM_COMPUTE_UNUSED(updates_info); - ARM_COMPUTE_UNUSED(indices_info); - ARM_COMPUTE_UNUSED(output_info); - ARM_COMPUTE_UNUSED(scatter_info); - ARM_COMPUTE_UNUSED(expected); + const Status status = CLScatter::validate(&input_info, &updates_info, &indices_info, &output_info, scatter_info); + ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); } +const auto allScatterFunctions = make("ScatterFunction", + {ScatterFunction::Update, ScatterFunction::Add, ScatterFunction::Sub, ScatterFunction::Min, ScatterFunction::Max }); + TEST_SUITE(Float) TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small1DScatterDataset(), - make("DataType", {DataType::F32}), - make("ScatterFunction", {ScatterFunction::Update, ScatterFunction::Add, ScatterFunction::Sub, ScatterFunction::Min, ScatterFunction::Max}), - make("ZeroInit", {false}))) +FIXTURE_DATA_TEST_CASE(RunSmall, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(datasets::Small1DScatterDataset(), + make("DataType", {DataType::F32}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {true}))) { - // TODO: Add validate() here. + validate(CLAccessor(_target), _reference, tolerance_f32); } // With this test, src should be passed as nullptr. -FIXTURE_DATA_TEST_CASE(RunSmallZeroInit, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small1DScatterDataset(), - make("DataType", {DataType::F32}), - make("ScatterFunction", {ScatterFunction::Add}), - make("ZeroInit", {true}))) +FIXTURE_DATA_TEST_CASE(RunSmallZeroInit, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(datasets::Small1DScatterDataset(), + make("DataType", {DataType::F32}), + make("ScatterFunction", {ScatterFunction::Add}), + make("ZeroInit", {true}), + make("Inplace", {false}), + make("Padding", {true}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +// Updates/src/dst have same no. dims. +FIXTURE_DATA_TEST_CASE(RunSmallMultiDim, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMultiDimDataset(), + make("DataType", {DataType::F32}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {true}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +// m+1-D to m+n-D cases +FIXTURE_DATA_TEST_CASE(RunSmallMultiIndices, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMultiIndicesDataset(), + make("DataType", {DataType::F32}), + make("ScatterFunction", {ScatterFunction::Update, ScatterFunction::Add }), + make("ZeroInit", {false}), + make("Inplace", {false, true}), + make("Padding", {true}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +// m+k, k-1-D m+n-D case +FIXTURE_DATA_TEST_CASE(RunSmallBatchedMultiIndices, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterBatchedDataset(), + make("DataType", {DataType::F32}), + make("ScatterFunction", {ScatterFunction::Update, ScatterFunction::Add}), + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {true}))) { - // TODO: Add validate() here + validate(CLAccessor(_target), _reference, tolerance_f32); } + +// m+k, k-1-D m+n-D case +FIXTURE_DATA_TEST_CASE(RunSmallScatterScalar, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterScalarDataset(), + make("DataType", {DataType::F32}), + make("ScatterFunction", {ScatterFunction::Update, ScatterFunction::Add}), + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {false}))) // NOTE: Padding not supported in this datset +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + TEST_SUITE_END() // FP32 + + +// NOTE: Padding is disabled for the SmallScatterMixedDataset due certain shapes not supporting padding. +// Padding is well tested in F32 Datatype test cases. + +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<half>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMixedDataset(), + make("DataType", {DataType::F16}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {false}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16); +} +TEST_SUITE_END() // FP16 TEST_SUITE_END() // Float + +TEST_SUITE(Integer) +TEST_SUITE(S32) +FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<int32_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMixedDataset(), + make("DataType", {DataType::S32}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {false}))) +{ + validate(CLAccessor(_target), _reference, tolerance_int); +} +TEST_SUITE_END() // S32 + +TEST_SUITE(S16) +FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<int16_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMixedDataset(), + make("DataType", {DataType::S16}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {false}))) +{ + validate(CLAccessor(_target), _reference, tolerance_int); +} +TEST_SUITE_END() // S16 + +TEST_SUITE(S8) +FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMixedDataset(), + make("DataType", {DataType::S8}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {false}))) +{ + validate(CLAccessor(_target), _reference, tolerance_int); +} +TEST_SUITE_END() // S8 + +TEST_SUITE(U32) +FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<uint32_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMixedDataset(), + make("DataType", {DataType::U32}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {false}))) +{ + validate(CLAccessor(_target), _reference, tolerance_int); +} +TEST_SUITE_END() // U32 + +TEST_SUITE(U16) +FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<uint16_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMixedDataset(), + make("DataType", {DataType::U16}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {false}))) +{ + validate(CLAccessor(_target), _reference, tolerance_int); +} +TEST_SUITE_END() // U16 + +TEST_SUITE(U8) +FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMixedDataset(), + make("DataType", {DataType::U8}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {false}))) +{ + validate(CLAccessor(_target), _reference, tolerance_int); +} +TEST_SUITE_END() // U8 +TEST_SUITE_END() // Integer + TEST_SUITE_END() // Scatter TEST_SUITE_END() // CL } // namespace validation diff --git a/tests/validation/CPP/DFT.cpp b/tests/validation/CPP/DFT.cpp index e19e850589..84431399be 100644 --- a/tests/validation/CPP/DFT.cpp +++ b/tests/validation/CPP/DFT.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -125,7 +125,7 @@ DATA_TEST_CASE(Real, framework::DatasetMode::ALL, shapes_2d_dft, auto backward = reference::ridft_2d(forward, is_odd); // Validate with input - validate(SimpleTensorAccessor<float>(src), backward, RelativeTolerance<float>(0.1f)); + validate(SimpleTensorAccessor<float>(src), backward, RelativeTolerance<float>(0.1f), 0.f, AbsoluteTolerance<float>(0.001f)); } DATA_TEST_CASE(Complex, framework::DatasetMode::ALL, shapes_2d_dft, diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp index 7a9230d37a..d739d4e1a4 100644 --- a/tests/validation/NEON/ConvolutionLayer.cpp +++ b/tests/validation/NEON/ConvolutionLayer.cpp @@ -767,21 +767,33 @@ FIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath } #if defined(ARM_COMPUTE_ENABLE_BF16) - +// These tests currently only works with SVE length 256 +// If other SVE length is used a kernel will fail to be found +// This needs to be addressed in order to ensure it doesn't revert to FP32 kernels for systems with SVE length other than 256 FIXTURE_DATA_TEST_CASE(UC2_2_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL, combine(framework::dataset::make("DataType", { DataType::F32 }), framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo8i4_bf16 }))) { - ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT_EQUAL(_computed_weight_format, arm_compute::WeightFormat::OHWIo8i4_bf16, framework::LogLevel::ERRORS); + if(Scheduler::get().cpu_info().has_bf16() && (arm_gemm::utils::get_vector_length<float>() == 8)){ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT_EQUAL(_computed_weight_format, arm_compute::WeightFormat::OHWIo8i4_bf16, framework::LogLevel::ERRORS); + } + else{ + ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS); + } } FIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL, combine(framework::dataset::make("DataType", { DataType::F32 }), framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo8i4_bf16 }))) { - ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo8i4_bf16, framework::LogLevel::ERRORS); + if(Scheduler::get().cpu_info().has_bf16() && (arm_gemm::utils::get_vector_length<float>() == 8)){ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo8i4_bf16, framework::LogLevel::ERRORS); + } + else{ + ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS); + } } #endif // ARM_COMPUTE_ENABLE_BF16 @@ -852,20 +864,36 @@ FIXTURE_DATA_TEST_CASE(UC3_2_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<c combine(framework::dataset::make("DataType", { DataType::F32 }), framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY }))) { - ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS); + if(Scheduler::get().cpu_info().has_bf16()){ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS); + } + else{ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS); + } } FIXTURE_DATA_TEST_CASE(UC3_2_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL, combine(framework::dataset::make("DataType", { DataType::F32 }), framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY }))) { - ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS); + if(Scheduler::get().cpu_info().has_bf16()){ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS); + } + else{ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS); + } } #endif // ARM_COMPUTE_ENABLE_BF16 @@ -1141,7 +1169,7 @@ TEST_SUITE(Float) TEST_SUITE(BFLOAT16) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType", DataType::BFLOAT16)), + framework::dataset::make("DataType", Scheduler::get().cpu_info().has_bf16() ? DataType::BFLOAT16 : DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NHWC })), ActivationFunctionsDataset)) { @@ -1329,6 +1357,27 @@ FIXTURE_DATA_TEST_CASE(RunSmallSigned, NEGEMMConvolutionLayerQuantizedPerChannel // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); } + +FIXTURE_DATA_TEST_CASE(MemoryStressLargeChannels, NEGEMMConvolutionLayerQuantizedPerChannelFixture<int8_t>, + framework::DatasetMode::ALL, + combine( + make("In", TensorShape(1U)), + make("Weights", TensorShape(1U, 1U, 1U, 17000U)), + make("Biases", TensorShape(17000U)), + make("Out", TensorShape(1U, 1U, 17000U)), + make("Info", PadStrideInfo(1, 1, 0, 0)), + make("Dilation", Size2D(1, 1)), + make("ReshapeWeights", { true }), + make("DataType", { DataType::QASYMM8_SIGNED }), + make("DataLayout", { DataLayout::NHWC }), + make("QuantizationInfo", QuantizationInfo(0.5f, 10)), + make("ActivationInfo", ActivationLayerInfo()), + make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} + TEST_SUITE_END() // QSYMM8_PER_CHANNEL TEST_SUITE_END() // Quantized diff --git a/tests/validation/NEON/GEMM.cpp b/tests/validation/NEON/GEMM.cpp index f956cdfeda..5f6a402204 100644 --- a/tests/validation/NEON/GEMM.cpp +++ b/tests/validation/NEON/GEMM.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -51,6 +51,8 @@ namespace test { namespace validation { +using framework::dataset::make; + namespace { constexpr AbsoluteTolerance<float> tolerance_f(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for FP32 data types */ @@ -60,7 +62,7 @@ const AbsoluteTolerance<float> abs_tolerance_f16(0.2f); /**< Absolute constexpr float tolerance_num = 0.07f; /**< Tolerance number for FP16 data types */ #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ /** CNN data types */ -const auto CNNDataTypes = framework::dataset::make("DataType", +const auto CNNDataTypes = make("DataType", { #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC DataType::F16, @@ -68,8 +70,8 @@ const auto CNNDataTypes = framework::dataset::make("DataType", DataType::F32, }); -const auto data_interleave = framework::dataset::make("M", 8, 12) * framework::dataset::make("N", 8, 12); -const auto data_transpose = framework::dataset::make("M", 8, 14) * framework::dataset::make("N", 7, 14); +const auto data_interleave = make("M", 8, 12) * make("N", 8, 12); +const auto data_transpose = make("M", 8, 14) * make("N", 7, 14); /** Zero padding test */ template <typename FunctionType> @@ -204,16 +206,16 @@ TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL) // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( - framework::dataset::make("LhsInfo", { TensorInfo(TensorShape(27U, 13U), 1, DataType::S32), // Unsupported data type + make("LhsInfo", { TensorInfo(TensorShape(27U, 13U), 1, DataType::S32), // Unsupported data type TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), }), - framework::dataset::make("RhsInfo",{ TensorInfo(TensorShape(8U, 27U), 1, DataType::S32), + make("RhsInfo",{ TensorInfo(TensorShape(8U, 27U), 1, DataType::S32), TensorInfo(TensorShape(8U, 27U), 1, DataType::F32), })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(8U, 13U), 1, DataType::S32), + make("OutputInfo",{ TensorInfo(TensorShape(8U, 13U), 1, DataType::S32), TensorInfo(TensorShape(8U, 13U), 1, DataType::F32), })), - framework::dataset::make("Expected", { false, true })), + make("Expected", { false, true })), lhs_info, rhs_info, output_info, expected) { constexpr float alpha = 1.0; @@ -226,8 +228,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( // *INDENT-ON* TEST_SUITE(KERNEL_SELECTION) DATA_TEST_CASE(KernelSelection_mul_and_add, framework::DatasetMode::ALL, - combine(framework::dataset::make("CpuExt", std::string("NEON")), - framework::dataset::make("DataType", { DataType::F32, + combine(make("CpuExt", std::string("NEON")), + make("DataType", { DataType::F32, DataType::F16 })), cpu_ext, data_type) @@ -261,8 +263,8 @@ TEST_SUITE_END() // KERNEL_SELECTION TEST_SUITE(TRANSPOSE_1XW) using CpuGemmTranspose1xW = NESynthetizeFunctionWithZeroConstantKernelBorder<cpu::kernels::CpuGemmTranspose1xWKernel>; DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip( - framework::dataset::make("N", { 1, 23, 63, 101 }), - framework::dataset::make("K", { 1, 47, 29, 27 })), + make("N", { 1, 23, 63, 101 }), + make("K", { 1, 47, 29, 27 })), n_value, k_value) { bool status = validate_zero_padding<CpuGemmTranspose1xW>(n_value, k_value); @@ -271,7 +273,7 @@ DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip( TEST_SUITE(U32) using CpuGemmTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, CpuGemmTranspose1xW, uint32_t>; -FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * framework::dataset::make("DataType", DataType::U32)) +FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * make("DataType", DataType::U32)) { // Validate output validate(Accessor(_target), _reference); @@ -280,7 +282,7 @@ TEST_SUITE_END() // U32 TEST_SUITE(U16) using CpuGemmTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, CpuGemmTranspose1xW, uint16_t>; -FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * framework::dataset::make("DataType", DataType::U16)) +FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * make("DataType", DataType::U16)) { // Validate output validate(Accessor(_target), _reference); @@ -289,7 +291,7 @@ TEST_SUITE_END() // U16 TEST_SUITE(U8) using CpuGemmTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, CpuGemmTranspose1xW, uint8_t>; -FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * framework::dataset::make("DataType", DataType::U8)) +FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * make("DataType", DataType::U8)) { // Validate output validate(Accessor(_target), _reference); @@ -302,8 +304,8 @@ TEST_SUITE(INTERLEAVE_4X4) using CpuGemmInterleave4x4 = NESynthetizeFunctionWithZeroConstantKernelBorder<cpu::kernels::CpuGemmInterleave4x4Kernel>; DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip( - framework::dataset::make("M", { 1, 23, 63, 101 }), - framework::dataset::make("K", { 1, 47, 29, 27 })), + make("M", { 1, 23, 63, 101 }), + make("K", { 1, 47, 29, 27 })), m_value, k_value) { bool status = validate_zero_padding<cpu::kernels::CpuGemmInterleave4x4Kernel>(m_value, k_value); @@ -312,7 +314,7 @@ DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip( TEST_SUITE(U32) using CpuGemmInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, CpuGemmInterleave4x4, uint32_t>; -FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * framework::dataset::make("DataType", DataType::U32)) +FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * make("DataType", DataType::U32)) { // Validate output validate(Accessor(_target), _reference); @@ -321,7 +323,7 @@ TEST_SUITE_END() // U32 TEST_SUITE(U16) using CpuGemmInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, CpuGemmInterleave4x4, uint16_t>; -FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * framework::dataset::make("DataType", DataType::U16)) +FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * make("DataType", DataType::U16)) { // Validate output validate(Accessor(_target), _reference); @@ -330,7 +332,7 @@ TEST_SUITE_END() // U16 TEST_SUITE(U8) using CpuGemmInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, CpuGemmInterleave4x4, uint8_t>; -FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * framework::dataset::make("DataType", DataType::QASYMM8)) +FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * make("DataType", DataType::QASYMM8)) { // Validate output validate(Accessor(_target), _reference); @@ -345,15 +347,18 @@ using NEGEMMFixture = GEMMValidationFixture<Tensor, Accessor, NEGEMM, T>; template <typename T> using NEBatchedMatMulFixture = GEMMValidationFixture<Tensor, Accessor, NEGEMM, T, true, false, false, false, false, true>; +template <typename T> +using NEGEMMAccumulateFixture = GEMMAccumulateValidationFixture<Tensor, Accessor, NEGEMM, T>; + TEST_SUITE(Float) -DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip(framework::dataset::make("In0", { TensorShape(21U, 13U), +DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip(make("In0", { TensorShape(21U, 13U), TensorShape(31U, 1U), TensorShape(31U, 1U), TensorShape(8U, 2U), TensorShape(38U, 12U), TensorShape(32U, 1U) }), - framework::dataset::make("In1", { TensorShape(33U, 21U), + make("In1", { TensorShape(33U, 21U), TensorShape(23U, 31U), TensorShape(23U, 31U), TensorShape(16U, 8U), @@ -366,75 +371,111 @@ DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip(framework:: ARM_COMPUTE_EXPECT(status, framework::LogLevel::ERRORS); } +DATA_TEST_CASE(ValidateAccumulate, framework::DatasetMode::ALL, combine( + zip(make("In0",{ TensorShape(21U, 13U) }), + make("In1", { TensorShape(33U, 21U) }), + make("Dst", { TensorShape(33U, 13U) })), + zip( + make("alpha", { 1.0, 100.0, 1.0, 1.0 }), + make("beta", { 0.0, 0.0, 1.0, 1.0 }), + make("is_c_null", { false, false, false, true }), + make("Expected", { true, false, false, true }))), + shape_a, shape_b, shape_dst, alpha, beta, is_c_null, expected) +{ + /* Accumulation test for GEMM kernels */ + // Create tensors + TensorInfo in_a(shape_a, 1, DataType::F32); + TensorInfo in_b(shape_b, 1, DataType::F32); + TensorInfo in_c(shape_dst, 1, DataType::F32); + TensorInfo dst(shape_dst, 1, DataType::F32); + + GEMMInfo gemm_info = GEMMInfo(); + gemm_info.set_accumulate(true); + + // Validate accumulation + cpu::CpuGemm gemm; + Status status = gemm.validate(&in_a, &in_b, (is_c_null ? nullptr : &in_c), &dst, alpha, beta, gemm_info); + ARM_COMPUTE_EXPECT((expected == bool(status)), framework::LogLevel::ERRORS); +} + #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(), - framework::dataset::make("ReshapeWeights", { true, false })), - framework::dataset::make("DataType", DataType::F16))) + make("ReshapeWeights", { true, false })), + make("DataType", DataType::F16))) { // Validate output validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); } - -TEST_SUITE(BATCHED_MATMUL) - -FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchedMatMulFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallBatchedMatMulDataset(), - framework::dataset::make("ReshapeWeights", { false })), - framework::dataset::make("DataType", DataType::F16))) +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeGEMMDataset(), + make("ReshapeWeights", { true, false })), + make("DataType", DataType::F16))) { // Validate output validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); } -TEST_SUITE_END() -FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeGEMMDataset(), - framework::dataset::make("ReshapeWeights", { true, false })), - - framework::dataset::make("DataType", DataType::F16))) +TEST_SUITE(BATCHED_MATMUL) +FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchedMatMulFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallBatchedMatMulDataset(), + make("ReshapeWeights", { false })), + make("DataType", DataType::F16))) { // Validate output validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); } -TEST_SUITE_END() +TEST_SUITE_END() // BATCHED_MATMUL + +TEST_SUITE_END() // FP16 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(), - framework::dataset::make("ReshapeWeights", { true, false })), - - framework::dataset::make("DataType", DataType::F32))) + make("ReshapeWeights", { true, false })), + make("DataType", DataType::F32))) { // Validate output validate(Accessor(_target), _reference, tolerance_f); } FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeGEMMDataset(), - framework::dataset::make("ReshapeWeights", { true, false })), - - framework::dataset::make("DataType", DataType::F32))) + make("ReshapeWeights", { true, false })), + make("DataType", DataType::F32))) { // Validate output validate(Accessor(_target), _reference, tolerance_f); } TEST_SUITE(BATCHED_MATMUL) - -TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchedMatMulFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallBatchedMatMulDataset(), - framework::dataset::make("ReshapeWeights", { false })), - framework::dataset::make("DataType", DataType::F32))) + make("ReshapeWeights", { false })), + make("DataType", DataType::F32))) { // Validate output validate(Accessor(_target), _reference, tolerance_f); } -TEST_SUITE_END() +TEST_SUITE_END() // BATCHED_MATMUL -TEST_SUITE_END() +TEST_SUITE(ACCUMULATE) +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMAccumulateFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallAccumulateGEMMDataset(), + make("ReshapeWeights", { false }), + make("DataType", DataType::F32))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMAccumulateFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeAccumulateGEMMDataset(), + make("ReshapeWeights", { false }), + make("DataType", DataType::F32))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f); +} +TEST_SUITE_END() // ACCUMULATE -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE_END() // FP32 -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE_END() // Float +TEST_SUITE_END() // GEMM +TEST_SUITE_END() // NEON } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/NEON/GEMMLowp.cpp b/tests/validation/NEON/GEMMLowp.cpp index 9c4d1741eb..d25f43a330 100644 --- a/tests/validation/NEON/GEMMLowp.cpp +++ b/tests/validation/NEON/GEMMLowp.cpp @@ -47,12 +47,24 @@ namespace test { namespace validation { +using framework::dataset::make; + +namespace +{ + constexpr AbsoluteTolerance<float> tolerance_batched(1); + constexpr AbsoluteTolerance<float> tolerance_quant(1); +} // namespace + + TEST_SUITE(NEON) TEST_SUITE(GEMMLowp) TEST_SUITE(MatrixMultiplyCore) using NEGEMMLowpMatrixMultiplyCoreFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>; +using NEGEMMLowpMatrixMultiplyCoreAccumulateFixture = GEMMLowpMatrixMultiplyAccumulateValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>; using NEGEMMLowpBatchedMatMulFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, true>; +using NEGEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture = GEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>; +using NEGEMMLowpDequantizedMatrixMultiplyValidationFixture = GEMMLowpDequantizedMatrixMultiplyValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>; using framework::dataset::make; @@ -80,6 +92,46 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, framework::dataset::c validate(b.info()->padding(), PaddingSize()); validate(c.info()->padding(), PaddingSize()); } +// accumulation is not supported for Int8/UInt8 in aarch32 +#ifdef __aarch64__ +DATA_TEST_CASE(ValidateAccumulate, framework::DatasetMode::ALL, combine( + zip( + make("In0",{ TensorShape(21U, 1U) }), + make("In1", { TensorShape(1U, 21U) }), + make("Dst", { TensorShape(1U, 1U) }), + make("a_offset", { -2 }), + make("a_offset", { 13 }) + ), + zip( + make("OutputDataType", { DataType::S32, DataType::QASYMM8, DataType::QASYMM8_SIGNED}), + make("Expected", { true, false, false }) + )), + shape_a, shape_b, shape_dst, a_offset, b_offset, output_data_type, expected) +{ + DataType input_data_type = (output_data_type == DataType::S32 ? DataType::QASYMM8 : output_data_type); + // Accumulation test for GEMM kernels + TensorInfo a(shape_a, 1, input_data_type, QuantizationInfo(1.0f / 255, a_offset)); + TensorInfo b(shape_b, 1, input_data_type, QuantizationInfo(1.0f / 255, b_offset)); + TensorInfo dst(shape_dst, 1, output_data_type, QuantizationInfo()); + + // Create and configure function + GEMMInfo gemm_info = GEMMInfo(); + gemm_info.set_accumulate(true); + + if (is_data_type_quantized(output_data_type)) + { + GEMMLowpOutputStageInfo gemmLowpOutputStageInfo = GEMMLowpOutputStageInfo(); + gemmLowpOutputStageInfo.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; + + gemm_info.set_gemmlowp_output_stage(gemmLowpOutputStageInfo); + } + + cpu::CpuGemmLowpMatrixMultiplyCore gemmlowp_mm; + Status status = gemmlowp_mm.validate(&a, &b, nullptr, &dst, gemm_info); + + ARM_COMPUTE_EXPECT((expected == bool(status)), framework::LogLevel::ERRORS); +} +#endif // __arch64__ // *INDENT-OFF* // clang-format off @@ -226,13 +278,10 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFixture, framework: validate(Accessor(_target), _reference); } -constexpr AbsoluteTolerance<float> tolerance_batched(1); - -using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned = - GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, uint8_t, uint8_t, true>; - TEST_SUITE(BatchedMatMul) TEST_SUITE(QASYMM8) +using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned = + GEMMLowpBatchedMatrixMultiplyCoreFusedOffsetOutputFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, uint8_t, uint8_t, true>; FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(), make("DataType", { DataType::QASYMM8 }), @@ -242,9 +291,9 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFi } TEST_SUITE_END() // QASYMM8 -using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned = - GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t, true>; TEST_SUITE(QASYMM8_SIGNED) +using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned = + GEMMLowpBatchedMatrixMultiplyCoreFusedOffsetOutputFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t, true>; FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(), make("DataType", { DataType::QASYMM8_SIGNED }), @@ -255,26 +304,84 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFi TEST_SUITE_END() // QASYMM8_SIGNED TEST_SUITE_END() // BatchedMatMul -using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>; -constexpr AbsoluteTolerance<float> tolerance_quant(1); - TEST_SUITE(FusedOffsetOutput) +using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>; FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(), - make("DataType", { DataType::QASYMM8 }))) + make("DataType", { DataType::QASYMM8 }), + make("reshape_b_only_on_first_run", { false }))) { // Validate output validate(Accessor(_target), _reference, tolerance_quant); } - FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(), - make("DataType", { DataType::QASYMM8 }))) + make("DataType", { DataType::QASYMM8 }), + make("reshape_b_only_on_first_run", { false }))) { // Validate output validate(Accessor(_target), _reference, tolerance_quant); } TEST_SUITE_END() // FusedOffsetOutput + +// accumulation is not supported for Int8/UInt8 in aarch32 +#ifdef __aarch64__ +TEST_SUITE(ACCUMULATION) +TEST_SUITE(S32) +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreAccumulateFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset()) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreAccumulateFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpDataset()) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() // S32 +TEST_SUITE_END() // ACCUMULATION +#endif // __arch64__ + +TEST_SUITE(DynamicQuantization) +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset()) +{ + // Validate output + validate(Accessor(_target), _reference); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpDataset()) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() // DynamicQuantization + +#ifdef __aarch64__ +// Deqaunt tests involve returning F32 from the MatrixMultiplyCore kernels and is only implemented in aarch64 +TEST_SUITE(Dequant) +constexpr AbsoluteTolerance<float> tolerance_dequantized(0.01f); +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpDequantizedMatrixMultiplyValidationFixture, framework::DatasetMode::ALL, + combine( + datasets::SmallGEMMLowpDataset(), + make("accumulate", {true, false}) + )) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_dequantized); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpDequantizedMatrixMultiplyValidationFixture, framework::DatasetMode::NIGHTLY, + combine( + datasets::LargeGEMMLowpDataset(), + make("accumulate", {false}) + )) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_dequantized); +} +TEST_SUITE_END() // Dequant +#endif // __aarch64__ + TEST_SUITE_END() // MatrixMultiplyCore TEST_SUITE_END() // GEMMLowp TEST_SUITE_END() // NEON diff --git a/tests/validation/NEON/ReorderLayer.cpp b/tests/validation/NEON/ReorderLayer.cpp index 42fa0f8b00..839ad0ac92 100644 --- a/tests/validation/NEON/ReorderLayer.cpp +++ b/tests/validation/NEON/ReorderLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -33,6 +33,7 @@ #include "tests/validation/Validation.h" #include "tests/validation/fixtures/ReorderFixture.h" #include "src/core/NEON/kernels/NEReorderKernel.h" +#include "src/core/NEON/kernels/arm_gemm/utils.hpp" namespace arm_compute { @@ -40,6 +41,8 @@ namespace test { namespace validation { +using framework::dataset::make; + TEST_SUITE(NEON) TEST_SUITE(ReorderLayer) @@ -48,13 +51,46 @@ using NEReorderLayerAlias = ReorderValidationFixture<Tensor, Accessor, NEReorder TEST_SUITE(FP32) #if defined(ARM_COMPUTE_ENABLE_SVE) -FIXTURE_DATA_TEST_CASE(RunBlock8, NEReorderLayerAlias<float>, framework::DatasetMode::ALL, combine(datasets::ReorderLayerDatasetBlock8(), framework::dataset::make("DataType", DataType::F32))) +DATA_TEST_CASE(ValidateReorderOHWIo8, framework::DatasetMode::ALL, combine( + zip( + make("InShape",{ TensorShape(10U, 9U), TensorShape(234U, 301U) }), + make("OutShape", { TensorShape(10U, 16U), TensorShape(234U, 304U) }) + ), + zip( + make("InputWeightFormat", {WeightFormat::OHWI}), + make("OutputWeightFormat", {WeightFormat::OHWIo8}) + )), + input_shape, output_shape, input_wf, output_wf) +{ + if(Scheduler::get().cpu_info().has_sve()){ + arm_compute::NEReorderLayer reorder_layer; + int vector_length = arm_gemm::utils::get_vector_length<float>(); + bool expected_bool_status = false; + if (vector_length == 8) + { + expected_bool_status = true; + } + + TensorInfo input_tensor_info(input_shape, 1, DataType::F32); + TensorInfo output_tensor_info(output_shape, 1, DataType::F32); + + Status status = reorder_layer.validate(&input_tensor_info, &output_tensor_info, input_wf, output_wf); + + ARM_COMPUTE_EXPECT((expected_bool_status == bool(status)), framework::LogLevel::ERRORS); + } +} + +FIXTURE_DATA_TEST_CASE(RunBlock8, NEReorderLayerAlias<float>, framework::DatasetMode::ALL, combine(datasets::ReorderLayerDatasetBlock8(), make("DataType", DataType::F32))) { // Validate output - validate(Accessor(_target), _reference); + if (_hardware_supports) + { + validate(Accessor(_target), _reference); + } } #endif // ARM_COMPUTE_ENABLE_SVE -FIXTURE_DATA_TEST_CASE(RunBlock4, NEReorderLayerAlias<float>, framework::DatasetMode::ALL, combine(datasets::ReorderLayerDatasetBlock4(), framework::dataset::make("DataType", DataType::F32))) + +FIXTURE_DATA_TEST_CASE(RunBlock4, NEReorderLayerAlias<float>, framework::DatasetMode::ALL, combine(datasets::ReorderLayerDatasetBlock4(), make("DataType", DataType::F32))) { // Validate output validate(Accessor(_target), _reference); @@ -68,4 +104,4 @@ TEST_SUITE_END() // NEON } // namespace test } // namespace arm_compute -#endif // defined(__aarch64__)
\ No newline at end of file +#endif // defined(__aarch64__) diff --git a/tests/validation/NEON/SoftmaxLayer.cpp b/tests/validation/NEON/SoftmaxLayer.cpp index 2397d81547..94d0866c38 100644 --- a/tests/validation/NEON/SoftmaxLayer.cpp +++ b/tests/validation/NEON/SoftmaxLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, 2022-2023 Arm Limited. + * Copyright (c) 2017-2020, 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -122,40 +122,35 @@ template <typename T> using NESoftmaxLayerFixture = SoftmaxValidationFixture<Tensor, Accessor, NESoftmaxLayer, T>; DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, - concat(concat( + concat( combine( - make("CpuExt", std::string("NEON")), + make("CpuExt", std::string("neon")), make("DataType", { DataType::F32, DataType::F16, DataType::QASYMM8, DataType::QASYMM8_SIGNED}) ), combine( - make("CpuExt", std::string("SVE")), + make("CpuExt", std::string("sme2")), make("DataType", { DataType::F32, DataType::F16})) ), - combine( - make("CpuExt", std::string("SVE2")), - make("DataType", { DataType::QASYMM8, - DataType::QASYMM8_SIGNED})) - ), cpu_ext, data_type) { using namespace cpu::kernels; cpuinfo::CpuIsaInfo cpu_isa{}; - cpu_isa.neon = (cpu_ext == "NEON"); - cpu_isa.sve = (cpu_ext == "SVE"); - cpu_isa.sve2 = (cpu_ext == "SVE2"); + cpu_isa.neon = (cpu_ext == "neon"); + cpu_isa.sme2 = (cpu_ext == "sme2"); cpu_isa.fp16 = (data_type == DataType::F16); const auto *selected_impl = CpuSoftmaxKernel::get_implementation( - SoftmaxKernelDataTypeISASelectorData{ data_type, cpu_isa, false /* is_log */ }, cpu::KernelSelectionType::Preferred); + SoftmaxKernelDataTypeISASelectorData{ data_type, cpu_isa, false /* is_log */, 0 /* axis */, CPUInfo::get().get_sme2_vector_length()}, + cpu::KernelSelectionType::Preferred); ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl); - std::string expected = "neon_" + cpu_impl_dt(data_type) + "_softmax"; + std::string expected = cpu_ext + "_" + cpu_impl_dt(data_type) + "_softmax"; std::string actual = selected_impl->name; ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS); @@ -164,9 +159,19 @@ DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT, + combine( + datasets::SoftmaxLayerSmallShapes(), + make("DataType", DataType::F16), + make("Beta", { 1.0f, 2.0f }), + make("Axis", { 0, -1 }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); +} FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine( - datasets::Small4DShapes(), + datasets::SmallShapes(), make("DataType", DataType::F16), make("Beta", { 1.0f, 2.0f }), make("Axis", { 0, 1 }))) @@ -178,7 +183,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerFixture<half>, framework::Datas combine( datasets::Small4DShapes(), make("DataType", DataType::F16), - make("Beta", { 1.0f, 2.0f }), + make("Beta", { 1.0f }), make("Axis", { 0, 2, -1 }))) { // Validate output diff --git a/tests/validation/NEON/UNIT/RuntimeContext.cpp b/tests/validation/NEON/UNIT/RuntimeContext.cpp index 819811943d..e0d45c639a 100644 --- a/tests/validation/NEON/UNIT/RuntimeContext.cpp +++ b/tests/validation/NEON/UNIT/RuntimeContext.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -48,6 +48,19 @@ namespace validation { TEST_SUITE(NEON) TEST_SUITE(UNIT) +#if defined(ARM_COMPUTE_OPENMP_SCHEDULER) && !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__) +TEST_CASE(CpuCapacity, framework::DatasetMode::ALL) +{ + CPUInfo& ci = arm_compute::Scheduler::get().cpu_info(); + const uint32_t nonlittle_num_cpus = ci.get_cpu_num_excluding_little(); + const uint32_t num_threads = arm_compute::Scheduler::get().num_threads(); + + ARM_COMPUTE_EXPECT(num_threads<=nonlittle_num_cpus , framework::LogLevel::ERRORS); +} +#endif /* defined(ARM_COMPUTE_OPENMP_SCHEDULER) && !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/ + TEST_SUITE(RuntimeContext) TEST_CASE(Scheduler, framework::DatasetMode::ALL) diff --git a/tests/validation/UNIT/CPPScheduler.cpp b/tests/validation/UNIT/CPPScheduler.cpp index 52431653b5..6a3f6819fc 100644 --- a/tests/validation/UNIT/CPPScheduler.cpp +++ b/tests/validation/UNIT/CPPScheduler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -68,8 +68,7 @@ public: TEST_SUITE(UNIT) TEST_SUITE(CPPScheduler) - -#if !defined(BARE_METAL) +#if defined(ARM_COMPUTE_CPP_SCHEDULER) && !defined(BARE_METAL) TEST_CASE(RethrowException, framework::DatasetMode::ALL) { CPPScheduler scheduler; @@ -87,7 +86,6 @@ TEST_CASE(RethrowException, framework::DatasetMode::ALL) } ARM_COMPUTE_EXPECT_FAIL("Expected exception not caught", framework::LogLevel::ERRORS); } -#endif // !defined(BARE_METAL) - +#endif // defined(ARM_COMPUTE_CPP_SCHEDULER) && !defined(BARE_METAL) TEST_SUITE_END() TEST_SUITE_END() diff --git a/tests/validation/fixtures/GEMMFixture.h b/tests/validation/fixtures/GEMMFixture.h index afde3d8067..94bedc83e1 100644 --- a/tests/validation/fixtures/GEMMFixture.h +++ b/tests/validation/fixtures/GEMMFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -46,14 +46,14 @@ namespace test namespace validation { template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool pretranspose_a = false, bool pretranspose_b = false, bool run_twice = false> -class GEMMValidationFixture : public framework::Fixture +class GEMMGenericValidationFixture : public framework::Fixture { public: - void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type) + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type, bool accumulate=false) { ARM_COMPUTE_UNUSED(pretranspose); - _target = compute_target(shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type); - _reference = compute_reference(shape_a, shape_b, output_shape, alpha, beta, data_type); + _target = compute_target(shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type, accumulate); + _reference = compute_reference(shape_a, shape_b, output_shape, alpha, beta, data_type, accumulate); } protected: @@ -80,7 +80,7 @@ protected: } TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c, const TensorShape &output_shape, float alpha, float beta, - DataType data_type) + DataType data_type, bool accumulate=false) { // Create tensors TensorType a = create_tensor<TensorType>(shape_a, data_type, 1); @@ -99,7 +99,7 @@ protected: &dst, alpha, beta, GEMMInfo(false, false, false, (reinterpret_output_as_3d ? output_shape[2] : 0), reinterpret_input_as_3d, false, GEMMLowpOutputStageInfo(), false, false, (reinterpret_input_as_3d - || reinterpret_output_as_3d))); + || reinterpret_output_as_3d), arm_compute::ActivationLayerInfo(), false /* fixed_format */, arm_compute::WeightFormat::UNSPECIFIED, false /* pretranspose_B */, accumulate)); ARM_COMPUTE_ASSERT(a.info()->is_resizable()); ARM_COMPUTE_ASSERT(b.info()->is_resizable()); ARM_COMPUTE_ASSERT(c.info()->is_resizable()); @@ -121,11 +121,14 @@ protected: // Fill tensors fill(AccessorType(a), 0); fill(AccessorType(b), 1); + if (accumulate) + { + fill(AccessorType(dst), 6); + } if(!disable_c) { fill(AccessorType(c), 2); } - // Run with variable inputs. if(run_twice) { @@ -145,7 +148,7 @@ protected: } SimpleTensor<T> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &output_shape, float alpha, float beta, - DataType data_type) + DataType data_type, bool accumulate=false) { TensorShape shape_a_to_use = shape_a; if(reinterpret_input_as_3d) @@ -158,6 +161,7 @@ protected: SimpleTensor<T> a{ shape_a_to_use, data_type, 1 }; SimpleTensor<T> b{ shape_b, data_type, 1 }; SimpleTensor<T> c{ output_shape, data_type, 1 }; + SimpleTensor<T> dst{ output_shape, data_type, 1 }; // Fill reference fill(a, 0); @@ -211,17 +215,51 @@ protected: fill(c, 5); } + // Do in place summation + if (accumulate) + { + fill(dst, 6); + } + // Setting beta to 0 will effectively disable C for the // computation of the reference: alpha * A * B + 0 * C // Use transposed tensors if boolean enabled else use original tensors - auto r = reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta); - return r; + if (accumulate) + { + reference::gemm_accumulate<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta, dst); + return dst; + } + else + { + return reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta); + } } TensorType _target{}; SimpleTensor<T> _reference{}; }; +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool pretranspose_a = false, bool pretranspose_b = false, bool run_twice = false> +class GEMMValidationFixture : protected GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice> +{ +public: + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type) + { + GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice>::setup(shape_a, shape_b, shape_c, output_shape, alpha, beta, pretranspose, data_type, false /*accumulate*/); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool pretranspose_a = false, bool pretranspose_b = false, bool run_twice = false> +class GEMMAccumulateValidationFixture : protected GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice> +{ +public: + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type) + { + bool accumulate = true; + GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice>::setup(shape_a, shape_b, shape_c, output_shape, alpha, beta, pretranspose, data_type, accumulate); + } +}; + template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType> class GEMMMatrixMultiplyValidationFixture : public framework::Fixture { diff --git a/tests/validation/fixtures/GEMMLowpFixture.h b/tests/validation/fixtures/GEMMLowpFixture.h index a65a1e6bd8..aa4eedb75d 100644 --- a/tests/validation/fixtures/GEMMLowpFixture.h +++ b/tests/validation/fixtures/GEMMLowpFixture.h @@ -30,6 +30,8 @@ #include "tests/framework/Fixture.h" #include "tests/validation/Validation.h" #include "tests/validation/reference/GEMMLowp.h" +#include "tests/validation/reference/ArithmeticOperations.h" +#include "tests/validation/reference/DequantizationLayer.h" #include <cstdint> #include <vector> @@ -42,20 +44,35 @@ namespace validation { namespace { - template <typename U> void fill(U &&tensor, int i) { + library->fill_tensor_uniform(tensor, i); +} + +template <typename U> +void fill_quantized(U &&tensor, int i) +{ ARM_COMPUTE_ASSERT(is_data_type_quantized(tensor.data_type())); library->fill_tensor_uniform(tensor, i); } template <typename U> -void fill_bias_s32(U &&tensor, int i, int32_t min, int32_t max) +void fill(U &&tensor, int i, int32_t min, int32_t max) { - ARM_COMPUTE_ASSERT(tensor.data_type() == DataType::S32); - std::uniform_int_distribution<int32_t> distribution(min, max); - library->fill(tensor, distribution, i); + if (tensor.data_type() == DataType::S32) { + std::uniform_int_distribution<int32_t> distribution(min, max); + library->fill(tensor, distribution, i); + } + else if(tensor.data_type() == DataType::F32) + { + std::uniform_real_distribution<float> distribution((float)min, (float)max); + library->fill(tensor, distribution, i); + } + else + { + ARM_COMPUTE_ERROR("NOT SUPPORTED!"); + } } /** Information about how to fill tensors */ @@ -64,6 +81,11 @@ struct TensorFillInfo // Bias fill range. Default values are arbitrary int32_t min_bias {-20000}; int32_t max_bias {20000}; + + // Output fill range. Default values are arbitrary + int32_t min_output {-20000}; + int32_t max_output {20000}; + // Optional extra hash to randomize tensor filling int32_t hash {0}; }; @@ -71,29 +93,42 @@ struct TensorFillInfo template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d, bool reinterpret_output_as_3d, typename OutputType, bool is_fused = false, bool run_twice = false> TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const QuantizationInfo& output_qinfo, DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8, - GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(), bool reshape_b_only_on_first_run = false, const TensorFillInfo& finfo = TensorFillInfo() ) + GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(), bool reshape_b_only_on_first_run = false, const TensorFillInfo& finfo = TensorFillInfo(), + bool accumulate = false, bool dynamic_qinfo = false, DataType data_type_output = DataType::UNKNOWN) { ARM_COMPUTE_ASSERT(is_data_type_quantized_asymmetric(data_type_a)); ARM_COMPUTE_ASSERT(data_type_a == data_type_b); - // Create tensors - const DataType data_type_output = output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : data_type_a; + // If unknown, set to sensible defaults + if (data_type_output == DataType::UNKNOWN) { + data_type_output = output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : data_type_a; + } - TensorType a = create_tensor<TensorType>(shape_a, data_type_a, 1, a_qinfo); - TensorType b = create_tensor<TensorType>(shape_b, data_type_b, 1, b_qinfo); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated + // Create tensors + TensorType a = create_tensor<TensorType>(shape_a, data_type_a, 1, dynamic_qinfo ? QuantizationInfo(1.0,0,true) : a_qinfo); + TensorType b = create_tensor<TensorType>(shape_b, data_type_b, 1, dynamic_qinfo ? QuantizationInfo(1.0,0,true) : b_qinfo); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated TensorType output = create_tensor<TensorType>(shape_output, data_type_output, 1, output_qinfo /* output_qinfo will be ignored when output stage type is None */); TensorType bias; if(is_fused) { TensorShape bias_shape(shape_b[0]); - bias = create_tensor<TensorType>(bias_shape, DataType::S32, 1); + bias = create_tensor<TensorType>(bias_shape,data_type_output == DataType::F32 ? DataType::F32 : DataType::S32, 1); } // Create and configure function // The GEMMinfo includes the values of the depth in case of reinterpreted 3d input/output FunctionType gemmlowp; gemmlowp.configure(&a, &b, is_fused ? &bias : nullptr, &output, GEMMInfo(false, false, reshape_b_only_on_first_run, (reinterpret_output_as_3d ? shape_output[2] : 0), reinterpret_input_as_3d, false, - output_stage)); + output_stage, false /*fp_mixed_precision*/, false /*fast_math*/, false /*broadcast_bias*/, + arm_compute::ActivationLayerInfo(), false /* fixed_format */, arm_compute::WeightFormat::UNSPECIFIED, + false /* pretranspose_B */, accumulate)); + + // If the QuantizationInfo is dynamic, it needs to be settable after configure (note that we also force it to be dynamic) + if (dynamic_qinfo) + { + a.info()->set_quantization_info(QuantizationInfo(a_qinfo.scale(), a_qinfo.offset(), true)); + b.info()->set_quantization_info(QuantizationInfo(b_qinfo.scale(), b_qinfo.offset(), true)); + } ARM_COMPUTE_ASSERT(a.info()->is_resizable()); ARM_COMPUTE_ASSERT(b.info()->is_resizable()); @@ -111,26 +146,32 @@ TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape ARM_COMPUTE_ASSERT(!output.info()->is_resizable()); // Fill tensors - fill(AccessorType(a), 0 + finfo.hash); - fill(AccessorType(b), 1 + finfo.hash); + fill_quantized(AccessorType(a), 0 + finfo.hash); + fill_quantized(AccessorType(b), 1 + finfo.hash); + + if (accumulate) + { + ARM_COMPUTE_ASSERT(accumulate != run_twice); + fill(AccessorType(output), 6 + finfo.hash, finfo.min_output, finfo.max_output); + } if(is_fused) { ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); bias.allocator()->allocate(); ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); - fill_bias_s32(AccessorType(bias), 2 + finfo.hash, finfo.min_bias, finfo.max_bias); + fill(AccessorType(bias), 2 + finfo.hash, finfo.min_bias, finfo.max_bias); } // Run with variable inputs. if(run_twice) { gemmlowp.run(); - fill(AccessorType(a), 3 + finfo.hash); // Fill tensors with new seed after run - fill(AccessorType(b), 4 + finfo.hash); + fill_quantized(AccessorType(a), 3 + finfo.hash); // Fill tensors with new seed after run + fill_quantized(AccessorType(b), 4 + finfo.hash); if(is_fused) { - fill_bias_s32(AccessorType(bias), 5 + finfo.hash, finfo.min_bias, finfo.max_bias); + fill(AccessorType(bias), 5 + finfo.hash, finfo.min_bias, finfo.max_bias); } } @@ -168,8 +209,8 @@ SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, con SimpleTensor<TW> b_transposed{ shape_b_transposed, data_type_b, 1, b_qinfo }; // Fill reference - fill(a, 0 + finfo.hash); - fill(b, 1 + finfo.hash); + fill_quantized(a, 0 + finfo.hash); + fill_quantized(b, 1 + finfo.hash); // Transpose reference if required /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M), @@ -189,11 +230,12 @@ SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, con // Run with variable inputs. const int32_t a_offset = a_qinfo.uniform().offset; const int32_t b_offset = b_qinfo.uniform().offset; + if(run_twice) { reference::gemmlowp_matrix_multiply_core<int32_t, TI, TW>((pretranspose_A ? a_transposed : a), (pretranspose_B ? b_transposed : b), shape_output, a_offset, b_offset); - fill((pretranspose_A) ? a_transposed : a, 3 + finfo.hash); - fill((pretranspose_B) ? b_transposed : b, 4 + finfo.hash); + fill_quantized((pretranspose_A) ? a_transposed : a, 3 + finfo.hash); + fill_quantized((pretranspose_B) ? b_transposed : b, 4 + finfo.hash); } return reference::gemmlowp_matrix_multiply_core<int32_t, TI, TW>((pretranspose_A ? a_transposed : a), (pretranspose_B ? b_transposed : b), shape_output, a_offset, b_offset); @@ -201,35 +243,77 @@ SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, con } // namespace template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool run_twice = false> -class GEMMLowpMatrixMultiplyCoreValidationFixture : public framework::Fixture +class GEMMLowpGenericMatrixMultiplyCoreValidationFixture : public framework::Fixture { public: - void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset) + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, bool accumulate=false, bool dynamic_qinfo = false) { const auto a_qinfo = QuantizationInfo(1.0f / 255, a_offset); const auto b_qinfo = QuantizationInfo(1.0f / 255, b_offset); - _target = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo); - _reference = compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo); + TensorFillInfo finfo; + _target = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate, dynamic_qinfo); + _reference = compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate); } protected: - TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo) + TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, const bool accumulate, const bool dynamic_qinfo) { const auto output_qinfo = QuantizationInfo(); // No output stage - return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo); + return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, DataType::QASYMM8, DataType::QASYMM8, GEMMLowpOutputStageInfo(), false, finfo, accumulate, dynamic_qinfo); } - SimpleTensor<int32_t> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo) + SimpleTensor<int32_t> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, bool accumulate) { - return compute_gemmlowp_reference<reinterpret_input_as_3d, uint8_t, uint8_t, false, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo); + SimpleTensor<int32_t> ref_output = compute_gemmlowp_reference<reinterpret_input_as_3d, uint8_t, uint8_t, false, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, + DataType::QASYMM8, DataType::QASYMM8, finfo); + + if (accumulate) + { + SimpleTensor<int32_t> output{ shape_output, DataType::S32, 1 }; + fill(output, 6 + finfo.hash, finfo.min_output, finfo.max_output); + reference::arithmetic_operation<int32_t>(reference::ArithmeticOperation::ADD, output, ref_output, output, ConvertPolicy::SATURATE); + return output; + } + + return ref_output; } TensorType _target{}; SimpleTensor<int32_t> _reference{}; }; +template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool run_twice = false> +class GEMMLowpMatrixMultiplyCoreValidationFixture : protected GEMMLowpGenericMatrixMultiplyCoreValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, run_twice> +{ +public: + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset) + { + GEMMLowpGenericMatrixMultiplyCoreValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, run_twice>::setup(shape_a, shape_b, shape_output, a_offset, b_offset, false /* accumulate */); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool run_twice = false> +class GEMMLowpMatrixMultiplyAccumulateValidationFixture : protected GEMMLowpGenericMatrixMultiplyCoreValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, run_twice> +{ +public: + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset) + { + GEMMLowpGenericMatrixMultiplyCoreValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, run_twice>::setup(shape_a, shape_b, shape_output, a_offset, b_offset, true /* accumulate */); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool run_twice = false> +class GEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture : protected GEMMLowpGenericMatrixMultiplyCoreValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, run_twice> +{ +public: + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset) + { + GEMMLowpGenericMatrixMultiplyCoreValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, run_twice>::setup(shape_a, shape_b, shape_output, a_offset, b_offset, false /* accumulate */, true /* dynamic_qinfo */); + } +}; + template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, typename TI = uint8_t, typename TW = uint8_t, bool run_twice = false> -class GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture : public framework::Fixture +class GEMMLowpGenericMatrixMultiplyCoreFusedOffsetOutputValidationFixture : public framework::Fixture { public: /** Dynamically initialize the quantization info with saturation awareness @@ -363,16 +447,16 @@ protected: TensorShape bias_shape(shape_b[0]); SimpleTensor<int32_t> bias{ bias_shape, DataType::S32, 1 }; - (run_twice) ? fill_bias_s32(bias, 5 + finfo.hash, finfo.min_bias, finfo.max_bias) : fill_bias_s32(bias, 2 + finfo.hash, finfo.min_bias, finfo.max_bias); // Fill bias with same seed as last run of gemmlowp_target + (run_twice) ? fill(bias, 5 + finfo.hash, finfo.min_bias, finfo.max_bias) : fill(bias, 2 + finfo.hash, finfo.min_bias, finfo.max_bias); // Fill bias with same seed as last run of gemmlowp_target switch(output_stage.type) { case GEMMLowpOutputStageType::QUANTIZE_DOWN: - return reference::gemmlowp_quantize_down_scale<int32_t, TW>(output, bias, + return reference::gemmlowp_quantize_down_scale<int32_t, TI>(output, bias, output_stage.gemmlowp_offset, output_stage.gemmlowp_multipliers, output_stage.gemmlowp_shifts, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound); break; case GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT: - return reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, TW>(output, bias, + return reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, TI>(output, bias, output_stage.gemmlowp_multipliers, output_stage.gemmlowp_shifts, output_stage.gemmlowp_offset, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound); break; default: @@ -384,15 +468,71 @@ protected: SimpleTensor<TI> _reference{}; }; -template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, typename TI = uint8_t, typename TW = uint8_t> -class GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture : public - GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, TI, TW> +template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool run_twice = false> +class GEMMLowpDequantizedMatrixMultiplyValidationFixture : public framework::Fixture +{ +public: + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, bool accumulate) + { + const bool dynamic_qinfo = false; + const auto a_qinfo = QuantizationInfo(1.0f / 255, a_offset); + const auto b_qinfo = QuantizationInfo(5.0f / 255, b_offset); + TensorFillInfo finfo; + _target = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate, dynamic_qinfo); + _reference = compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate, dynamic_qinfo); + } + +protected: + TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, const bool accumulate, const bool dynamic_qinfo) + { + const auto output_qinfo = QuantizationInfo(); + return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, GEMMLowpOutputStageInfo(), false, finfo, accumulate, dynamic_qinfo, DataType::F32); + } + + SimpleTensor<float> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, bool accumulate, const bool dynamic_qinfo) + { + QuantizationInfo s32_ref_output_quant_info = QuantizationInfo(a_qinfo.uniform().scale * b_qinfo.uniform().scale, 0, dynamic_qinfo); + + SimpleTensor<int32_t> s32_ref_output = compute_gemmlowp_reference<reinterpret_input_as_3d, int8_t, int8_t, false, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, + DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, finfo); + s32_ref_output.quantization_info(s32_ref_output_quant_info); + + SimpleTensor<float> f32_ref_output(s32_ref_output.shape(), DataType::F32); + f32_ref_output = reference::dequantization_layer<float, int32_t>(s32_ref_output); + + if (accumulate) + { + SimpleTensor<float> output{ shape_output, DataType::F32, 1 }; + fill(output, 6 + finfo.hash, finfo.min_output, finfo.max_output); + reference::arithmetic_operation<float>(reference::ArithmeticOperation::ADD, output, f32_ref_output, output, ConvertPolicy::SATURATE); + return output; + } + + return f32_ref_output; + } + + TensorType _target{}; + SimpleTensor<float> _reference{}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, typename TI = uint8_t, typename TW = uint8_t, bool run_twice = false> +class GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture : public GEMMLowpGenericMatrixMultiplyCoreFusedOffsetOutputValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, TI, TW, run_twice> +{ +public: + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, GEMMLowpOutputStageType output_stage_type, DataType data_type, bool reshape_b_only_on_first_run) + { + GEMMLowpGenericMatrixMultiplyCoreFusedOffsetOutputValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, TI, TW, run_twice>::setup(shape_a, shape_b, + shape_output, output_stage_type, data_type, reshape_b_only_on_first_run); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, typename TI = uint8_t, typename TW = uint8_t, bool run_twice = false> +class GEMMLowpBatchedMatrixMultiplyCoreFusedOffsetOutputFixture : public GEMMLowpGenericMatrixMultiplyCoreFusedOffsetOutputValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, TI, TW, run_twice> { public: - void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, GEMMLowpOutputStageType output_stage_type, DataType data_type) + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, GEMMLowpOutputStageType output_stage_type, DataType data_type, bool reshape_b_only_on_first_run) { - GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, TI, TW>::setup(shape_a, shape_b, - shape_output, output_stage_type, data_type, false /* reshape_b_only_on_first_run */); + GEMMLowpGenericMatrixMultiplyCoreFusedOffsetOutputValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, TI, TW, run_twice>::setup(shape_a, shape_b, shape_output, output_stage_type, data_type, reshape_b_only_on_first_run); } }; diff --git a/tests/validation/fixtures/ReorderFixture.h b/tests/validation/fixtures/ReorderFixture.h index 36e62696bc..8e28484c48 100644 --- a/tests/validation/fixtures/ReorderFixture.h +++ b/tests/validation/fixtures/ReorderFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ACL_TESTS_VALIDATION_FIXTURES_REORDERFIXTURE -#define ACL_TESTS_VALIDATION_FIXTURES_REORDERFIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_REORDERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_REORDERFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -32,6 +32,7 @@ #include "tests/framework/Asserts.h" #include "tests/framework/Fixture.h" #include "tests/validation/reference/Reorder.h" +#include "src/core/NEON/kernels/arm_gemm/utils.hpp" namespace arm_compute { @@ -44,10 +45,23 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ReorderValidationFixture : public framework::Fixture { public: + void check_hardware_supports(WeightFormat output_wf){ + if(!Scheduler::get().cpu_info().has_sve() && output_wf!=WeightFormat::OHWIo4){ + _hardware_supports = false; + } + if (Scheduler::get().cpu_info().has_sve() && arm_gemm::utils::get_vector_length<float>() != 8 && output_wf==WeightFormat::OHWIo8) + { + _hardware_supports = false; + } + } + void setup(TensorShape input_shape, TensorShape output_shape, WeightFormat input_wf, WeightFormat output_wf, DataType data_type) { - _target = compute_target(input_shape, output_shape, input_wf, output_wf, data_type); - _reference = compute_reference(input_shape, output_shape, output_wf, data_type); + check_hardware_supports(output_wf); + if (_hardware_supports){ + _target = compute_target(input_shape, output_shape, input_wf, output_wf, data_type); + _reference = compute_reference(input_shape, output_shape, output_wf, data_type); + } } protected: @@ -98,6 +112,7 @@ public: return reference::reorder_layer<T>(src, output_shape, output_wf); } + bool _hardware_supports = true; TensorType _target{}; SimpleTensor<T> _reference{}; }; @@ -105,4 +120,4 @@ public: } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ACL_TESTS_VALIDATION_FIXTURES_REORDERFIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_REORDERFIXTURE_H diff --git a/tests/validation/fixtures/ScatterLayerFixture.h b/tests/validation/fixtures/ScatterLayerFixture.h index bda5532a51..af161ef98b 100644 --- a/tests/validation/fixtures/ScatterLayerFixture.h +++ b/tests/validation/fixtures/ScatterLayerFixture.h @@ -27,8 +27,9 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" #include "tests/Globals.h" -#include "tests/framework/Asserts.h" // Required for ARM_COMPUTE_ASSERT +#include "tests/framework/Asserts.h" #include "tests/framework/Fixture.h" +#include "tests/validation/Helpers.h" #include "tests/validation/Validation.h" #include "tests/validation/reference/ScatterLayer.h" #include "tests/SimpleTensor.h" @@ -46,21 +47,46 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ScatterGenericValidationFixture : public framework::Fixture { public: - void setup(TensorShape src_shape, TensorShape updates_shape, TensorShape indices_shape, TensorShape out_shape, DataType data_type, ScatterInfo scatter_info, QuantizationInfo src_qinfo = QuantizationInfo(), QuantizationInfo o_qinfo = QuantizationInfo()) + void setup(TensorShape src_shape, TensorShape updates_shape, TensorShape indices_shape, + TensorShape out_shape, DataType data_type, ScatterInfo scatter_info, bool inplace, bool padding, + QuantizationInfo src_qinfo = QuantizationInfo(), QuantizationInfo o_qinfo = QuantizationInfo()) { - _target = compute_target(src_shape, updates_shape, indices_shape, out_shape, data_type, scatter_info, src_qinfo, o_qinfo); + // this is for improving randomness across tests + _hash = src_shape[0] + src_shape[1] + src_shape[2] + src_shape[3] + src_shape[4] + src_shape[5] + + updates_shape[0] + updates_shape[1] + updates_shape[2] + updates_shape[3] + + updates_shape[4] + updates_shape[5] + + indices_shape[0] + indices_shape[1] + indices_shape[2] + indices_shape[3]; + + _target = compute_target(src_shape, updates_shape, indices_shape, out_shape, data_type, scatter_info, inplace, padding, src_qinfo, o_qinfo); _reference = compute_reference(src_shape, updates_shape, indices_shape, out_shape, data_type,scatter_info, src_qinfo , o_qinfo); } protected: template <typename U> - void fill(U &&tensor, int i, float lo = -1.f, float hi = 1.f) + void fill(U &&tensor, int i) { switch(tensor.data_type()) { case DataType::F32: + case DataType::F16: + { + std::uniform_real_distribution<float> distribution(-10.f, 10.f); + library->fill(tensor, distribution, i); + break; + } + case DataType::S32: + case DataType::S16: + case DataType::S8: + { + std::uniform_int_distribution<int32_t> distribution(-100, 100); + library->fill(tensor, distribution, i); + break; + } + case DataType::U32: + case DataType::U16: + case DataType::U8: { - std::uniform_real_distribution<float> distribution(lo, hi); + std::uniform_int_distribution<uint32_t> distribution(0, 200); library->fill(tensor, distribution, i); break; } @@ -71,37 +97,47 @@ protected: } } - // This is used to fill indices tensor with U32 datatype. + // This is used to fill indices tensor with S32 datatype. // Used to prevent ONLY having values that are out of bounds. template <typename U> void fill_indices(U &&tensor, int i, const TensorShape &shape) { - // Calculate max indices the shape should contain. Add an arbitrary constant to allow testing for some out of bounds values. - const uint32_t max = std::max({shape[0] , shape[1], shape[2]}) + 5; - library->fill_tensor_uniform(tensor, i, static_cast<uint32_t>(0), static_cast<uint32_t>(max)); + // Calculate max indices the shape should contain. Add an arbitrary value to allow testing for some out of bounds values (In this case min dimension) + const int32_t max = std::min({shape[0] , shape[1], shape[2]}) + 1; + library->fill_tensor_uniform(tensor, i, static_cast<int32_t>(0), static_cast<int32_t>(max)); } - TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c, const TensorShape &out_shape, DataType data_type, const ScatterInfo info, QuantizationInfo a_qinfo, QuantizationInfo o_qinfo) + TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c, + const TensorShape &out_shape, DataType data_type, const ScatterInfo info, bool inplace, bool padding, + QuantizationInfo a_qinfo, QuantizationInfo o_qinfo) { // 1. Create relevant tensors using ScatterInfo data structure. // ---------------------------------------------------- // In order - src, updates, indices, output. TensorType src = create_tensor<TensorType>(shape_a, data_type, 1, a_qinfo); TensorType updates = create_tensor<TensorType>(shape_b, data_type, 1, a_qinfo); - TensorType indices = create_tensor<TensorType>(shape_c, DataType::U32, 1, QuantizationInfo()); + TensorType indices = create_tensor<TensorType>(shape_c, DataType::S32, 1, QuantizationInfo()); TensorType dst = create_tensor<TensorType>(out_shape, data_type, 1, o_qinfo); FunctionType scatter; // Configure operator - // When scatter_info.zero_initialization is true, pass nullptr to scatter function. + // When scatter_info.zero_initialization is true, pass nullptr for src + // because dst does not need to be initialized with src values. if(info.zero_initialization) { scatter.configure(nullptr, &updates, &indices, &dst, info); } else { - scatter.configure(&src, &updates, &indices, &dst, info); + if(inplace) + { + scatter.configure(&src, &updates, &indices, &src, info); + } + else + { + scatter.configure(&src, &updates, &indices, &dst, info); + } } // Assertions @@ -110,51 +146,92 @@ protected: ARM_COMPUTE_ASSERT(indices.info()->is_resizable()); ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + if(padding) + { + add_padding_x({ &src, &updates, &indices}); + + if(!inplace) + { + add_padding_x({ &dst }); + } + } + // Allocate tensors src.allocator()->allocate(); updates.allocator()->allocate(); indices.allocator()->allocate(); - dst.allocator()->allocate(); + + if(!inplace) + { + dst.allocator()->allocate(); + } ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); ARM_COMPUTE_ASSERT(!updates.info()->is_resizable()); ARM_COMPUTE_ASSERT(!indices.info()->is_resizable()); - ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + + if(!inplace) + { + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + } // Fill update (a) and indices (b) tensors. - fill(AccessorType(src), 0); - fill(AccessorType(updates), 1); - fill_indices(AccessorType(indices), 2, out_shape); + fill(AccessorType(src), 0 + _hash); + fill(AccessorType(updates), 1+ _hash); + fill_indices(AccessorType(indices), 2 + _hash, out_shape); scatter.run(); - return dst; + if(inplace) + { + return src; + } + else + { + return dst; + } } - SimpleTensor<T> compute_reference(const TensorShape &a_shape, const TensorShape &b_shape, const TensorShape &c_shape, const TensorShape &out_shape, DataType data_type, - ScatterInfo info, QuantizationInfo a_qinfo, QuantizationInfo o_qinfo) + SimpleTensor<T> compute_reference(const TensorShape &a_shape, const TensorShape &b_shape, const TensorShape &c_shape, + const TensorShape &out_shape, DataType data_type, ScatterInfo info, QuantizationInfo a_qinfo, QuantizationInfo o_qinfo) { // Output Quantization not currently in use - fixture should be extended to support this. ARM_COMPUTE_UNUSED(o_qinfo); + TensorShape src_shape = a_shape; + TensorShape updates_shape = b_shape; + TensorShape indices_shape = c_shape; + const int num_ind_dims = c_shape.num_dimensions(); + + // 1. Collapse batch index into a single dim if necessary for update tensor and indices tensor. + if(num_ind_dims >= 3) + { + indices_shape = indices_shape.collapsed_from(1); + updates_shape = updates_shape.collapsed_from(updates_shape.num_dimensions() - (num_ind_dims -1)); // Collapses batch dims + } + + // 2. Collapse data dims into a single dim. + // Collapse all src dims into 2 dims. First one holding data, the other being the index we iterate over. + src_shape.collapse(updates_shape.num_dimensions() - 1); // Collapse all data dims into single dim. + src_shape = src_shape.collapsed_from(1); // Collapse all index dims into a single dim + updates_shape.collapse(updates_shape.num_dimensions() - 1); // Collapse data dims (all except last dim which is batch dim) // Create reference tensors - SimpleTensor<T> src{ a_shape, data_type, 1, a_qinfo }; - SimpleTensor<T> updates{b_shape, data_type, 1, QuantizationInfo() }; - SimpleTensor<uint32_t> indices{ c_shape, DataType::U32, 1, QuantizationInfo() }; + SimpleTensor<T> src{ src_shape, data_type, 1, a_qinfo }; + SimpleTensor<T> updates{updates_shape, data_type, 1, QuantizationInfo() }; + SimpleTensor<int32_t> indices{ indices_shape, DataType::S32, 1, QuantizationInfo() }; // Fill reference - fill(src, 0); - fill(updates, 1); - fill_indices(indices, 2, out_shape); - - // Calculate individual reference. - auto result = reference::scatter_layer<T>(src, updates, indices, out_shape, info); + fill(src, 0 + _hash); + fill(updates, 1 + _hash); + fill_indices(indices, 2 + _hash, out_shape); - return result; + // Calculate individual reference using collapsed shapes + return reference::scatter_layer<T>(src, updates, indices, out_shape, info); } TensorType _target{}; SimpleTensor<T> _reference{}; + int32_t _hash{}; }; // This fixture will use the same shape for updates as indices. @@ -162,9 +239,12 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ScatterValidationFixture : public ScatterGenericValidationFixture<TensorType, AccessorType, FunctionType, T> { public: - void setup(TensorShape src_shape, TensorShape update_shape, TensorShape indices_shape, TensorShape out_shape, DataType data_type, ScatterFunction func, bool zero_init) + void setup(TensorShape src_shape, TensorShape update_shape, TensorShape indices_shape, + TensorShape out_shape, DataType data_type, ScatterFunction func, bool zero_init, bool inplace, bool padding) { - ScatterGenericValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(src_shape, update_shape, indices_shape, out_shape, data_type, ScatterInfo(func, zero_init), QuantizationInfo(), QuantizationInfo()); + ScatterGenericValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(src_shape, update_shape, + indices_shape, out_shape, data_type, ScatterInfo(func, zero_init), inplace, padding, + QuantizationInfo(), QuantizationInfo()); } }; diff --git a/tests/validation/reference/DequantizationLayer.cpp b/tests/validation/reference/DequantizationLayer.cpp index 64a89aa6a0..67d69c2c38 100644 --- a/tests/validation/reference/DequantizationLayer.cpp +++ b/tests/validation/reference/DequantizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -59,6 +59,12 @@ TOut dequantize(int16_t val, const UniformQuantizationInfo qinfo, DataType dt) ARM_COMPUTE_UNUSED(dt); return static_cast<TOut>(dequantize_qsymm16(val, qinfo)); } +template <typename TOut> +TOut dequantize(int32_t val, const UniformQuantizationInfo qinfo, DataType dt) +{ + ARM_COMPUTE_UNUSED(dt); + return static_cast<TOut>(dequantize_s32(val, qinfo)); +} } // namespace template <typename TOut, typename TIn> SimpleTensor<TOut> dequantization_layer(const SimpleTensor<TIn> &src) @@ -115,6 +121,7 @@ template SimpleTensor<half> dequantization_layer(const SimpleTensor<int8_t> &src template SimpleTensor<float> dequantization_layer(const SimpleTensor<int8_t> &src); template SimpleTensor<half> dequantization_layer(const SimpleTensor<int16_t> &src); template SimpleTensor<float> dequantization_layer(const SimpleTensor<int16_t> &src); +template SimpleTensor<float> dequantization_layer(const SimpleTensor<int32_t> &src); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/GEMM.cpp b/tests/validation/reference/GEMM.cpp index 20f1139a02..d513343796 100644 --- a/tests/validation/reference/GEMM.cpp +++ b/tests/validation/reference/GEMM.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021,2024 Arm Limited. + * Copyright (c) 2017-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,6 +25,7 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Types.h" +#include "tests/validation/reference/ArithmeticOperations.h" namespace arm_compute { @@ -180,17 +181,22 @@ SimpleTensor<T> gemm_mixed_precision( return dst; } -template SimpleTensor<float> -gemm(const SimpleTensor<float> &a, const SimpleTensor<float> &b, const SimpleTensor<float> &c, float alpha, float beta); -template SimpleTensor<bfloat16> gemm(const SimpleTensor<bfloat16> &a, - const SimpleTensor<bfloat16> &b, - const SimpleTensor<bfloat16> &c, - float alpha, - float beta); -template SimpleTensor<half> -gemm(const SimpleTensor<half> &a, const SimpleTensor<half> &b, const SimpleTensor<half> &c, float alpha, float beta); -template SimpleTensor<half> gemm_mixed_precision( - const SimpleTensor<half> &a, const SimpleTensor<half> &b, const SimpleTensor<half> &c, float alpha, float beta); +template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type> +void gemm_accumulate(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta, SimpleTensor<T> &dst) +{ + // Compute reference + SimpleTensor<T> dst_gemm = gemm(a, b, c, alpha, beta); + reference::arithmetic_operation<T>(reference::ArithmeticOperation::ADD, dst, dst_gemm, dst, ConvertPolicy::SATURATE); +} + +template SimpleTensor<bfloat16> gemm(const SimpleTensor<bfloat16> &a, const SimpleTensor<bfloat16> &b, const SimpleTensor<bfloat16> &c, float alpha, float beta); +template SimpleTensor<float> gemm(const SimpleTensor<float> &a, const SimpleTensor<float> &b, const SimpleTensor<float> &c, float alpha, float beta); +template SimpleTensor<half> gemm(const SimpleTensor<half> &a, const SimpleTensor<half> &b, const SimpleTensor<half> &c, float alpha, float beta); + +template void gemm_accumulate(const SimpleTensor<float> &a, const SimpleTensor<float> &b, const SimpleTensor<float> &c, float alpha, float beta, SimpleTensor<float> &dst); +template void gemm_accumulate(const SimpleTensor<half> &a, const SimpleTensor<half> &b, const SimpleTensor<half> &c, float alpha, float beta, SimpleTensor<half> &dst); + +template SimpleTensor<half> gemm_mixed_precision(const SimpleTensor<half> &a, const SimpleTensor<half> &b, const SimpleTensor<half> &c, float alpha, float beta); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/GEMM.h b/tests/validation/reference/GEMM.h index 5feaeda584..1b97570122 100644 --- a/tests/validation/reference/GEMM.h +++ b/tests/validation/reference/GEMM.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2019, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_GEMM_H -#define ARM_COMPUTE_TEST_GEMM_H +#ifndef ACL_TESTS_VALIDATION_REFERENCE_GEMM_H +#define ACL_TESTS_VALIDATION_REFERENCE_GEMM_H #include "tests/SimpleTensor.h" #include "tests/validation/Helpers.h" @@ -41,8 +41,11 @@ SimpleTensor<T> gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const S template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0> SimpleTensor<T> gemm_mixed_precision(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta); +template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0> +void gemm_accumulate(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta, SimpleTensor<T> &dst); + } // namespace reference } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_GEMM_H */ +#endif // ACL_TESTS_VALIDATION_REFERENCE_GEMM_H diff --git a/tests/validation/reference/GEMMLowp.cpp b/tests/validation/reference/GEMMLowp.cpp index 1615b51e73..30c577d850 100644 --- a/tests/validation/reference/GEMMLowp.cpp +++ b/tests/validation/reference/GEMMLowp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #include "GEMMLowp.h" #include "arm_compute/core/Types.h" +#include "tests/validation/reference/ArithmeticOperations.h" #include "tests/validation/reference/UtilsQuantizedAsymm.h" #include "support/ToolchainSupport.h" @@ -230,6 +231,13 @@ SimpleTensor<T_out> gemmlowp_matrix_multiply_core(const SimpleTensor<T_in> &a, c return c; } +template <typename T_out, typename T_in, typename T_in_1> +void gemmlowp_matrix_multiply_core_accumulate(const SimpleTensor<T_in> &a, const SimpleTensor<T_in_1> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset, SimpleTensor<T_out> &dst) +{ + SimpleTensor<T_out> dst_gemm = gemmlowp_matrix_multiply_core<T_out, T_in, T_in_1>(a, b, shape_c, a_offset, b_offset); + reference::arithmetic_operation<T_out>(reference::ArithmeticOperation::ADD, dst, dst_gemm, dst, ConvertPolicy::SATURATE); +} + // used to validate assembly kernels which don't know anything about offsets template <typename T1, typename T2, typename T3> SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c) @@ -336,6 +344,8 @@ template SimpleTensor<int8_t> gemmlowp_quantize_down_scale(const SimpleTensor<in std::vector<int32_t> result_shift, int32_t min, int32_t max); template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset); template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset); +template void gemmlowp_matrix_multiply_core_accumulate(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset, SimpleTensor<int32_t> &dst); +template void gemmlowp_matrix_multiply_core_accumulate(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset, SimpleTensor<int32_t> &dst); template SimpleTensor<int32_t> gemmlowp<int32_t, int8_t, int8_t>(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c); template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, uint8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c); template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, int8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c); diff --git a/tests/validation/reference/GEMMLowp.h b/tests/validation/reference/GEMMLowp.h index 99015d71fb..6e471fdad1 100644 --- a/tests/validation/reference/GEMMLowp.h +++ b/tests/validation/reference/GEMMLowp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_GEMMLOWP_H -#define ARM_COMPUTE_TEST_GEMMLOWP_H +#ifndef ACL_TESTS_VALIDATION_REFERENCE_GEMMLOWP_H +#define ACL_TESTS_VALIDATION_REFERENCE_GEMMLOWP_H #include "tests/SimpleTensor.h" #include "tests/validation/Helpers.h" @@ -38,6 +38,9 @@ namespace reference template <typename T1, typename T2, typename T3> SimpleTensor<T1> gemmlowp_matrix_multiply_core(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset); +template <typename T1, typename T2, typename T3> +void gemmlowp_matrix_multiply_core_accumulate(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset, SimpleTensor<T1> &dst_); + template <typename T1, typename T2, typename T3 = T2> SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c); @@ -71,4 +74,4 @@ SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<TIn> } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_GEMMLOWP_H */ +#endif // ACL_TESTS_VALIDATION_REFERENCE_GEMMLOWP_H diff --git a/tests/validation/reference/QuantizationLayer.cpp b/tests/validation/reference/QuantizationLayer.cpp index 27665375c3..ad7ba7ac43 100644 --- a/tests/validation/reference/QuantizationLayer.cpp +++ b/tests/validation/reference/QuantizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/reference/ScatterLayer.cpp b/tests/validation/reference/ScatterLayer.cpp index 920f2b9990..55c48a9002 100644 --- a/tests/validation/reference/ScatterLayer.cpp +++ b/tests/validation/reference/ScatterLayer.cpp @@ -23,6 +23,7 @@ */ #include "ScatterLayer.h" #include "tests/validation/Helpers.h" +#include "arm_compute/core/TensorShape.h" namespace arm_compute { @@ -62,51 +63,89 @@ T reduce_op(const T ¤t,const T &update,const ScatterFunction func) } template float reduce_op(const float ¤t,const float &update,const ScatterFunction func); +template half reduce_op(const half ¤t,const half &update,const ScatterFunction func); } -// Note : This function currently only supports 1D src, 1D updates, 2D indices, 1D output tensors. +// NOTE: This function expects collapsed tensors as input. +// Batch dims for update/indices tensors should be collapsed into a single dim. +// Data dims should be collapsed into a single dim for both update and src tensors prior to calling this function. template <typename T> -SimpleTensor<T> scatter_layer_internal(const SimpleTensor<T> &src, const SimpleTensor<T> &updates, const SimpleTensor<uint32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info) +SimpleTensor<T> scatter_layer_internal(const SimpleTensor<T> &src, const SimpleTensor<T> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info) { + // 1. If zero initialization variable is false, copy src data to dst. SimpleTensor<T> dst{ out_shape, src.data_type(), 1 }; - - // 1. If zero initialization variable is true, fill dst with 0 values. Else copy src data to dst. - if(info.zero_initialization) - { - for (int i = 0; i < src.num_elements(); ++i) - { - dst[i] = static_cast<T>(0); - } - } - else + if(!info.zero_initialization) { std::copy_n(src.data(), src.num_elements(), dst.data()); } - // 2. Get max index of output tensor, then iterate over index tensor. - const auto x_bound = dst.shape().x(); + // Number of elements between each value of the dim being iterated through + const unsigned int data_stride = updates.shape().total_size_lower(updates.shape().num_dimensions() - 1); + const unsigned int no_output_dims = out_shape.num_dimensions(); + // Calculate output stride at given index for all output dims. + std::vector<unsigned int> out_stride_at_idx(no_output_dims); + for (unsigned int i = 0 ; i < no_output_dims; i++) + { + out_stride_at_idx[i] = out_shape.total_size_lower(i); + } - for(int i = 0; i < indices.num_elements(); ++i) + const unsigned int indices_x_dim = static_cast<unsigned int>(indices.shape()[0]); + const unsigned int indices_y_dim = static_cast<unsigned int>(indices.shape()[1]); + + // 2. Iterate over indices tensor y-dim and replace sections of dst tensor with relevant areas of update tensor. + for(unsigned int i = 0; i < indices_y_dim; i++) { - // 3. Check whether index is out of bounds for dst, if not then apply reduce op. - const auto index = indices[i]; - if (index < x_bound) // Note : index is always >= 0 as datatype is unsigned. + // NOTE : Currently, indices.shape() == [X, Y, 1, 1], where X is the indices dim and Y is the batch dim + // Starting index for both the update and indices tensors. + const unsigned int update_dim_start = i * data_stride; + const unsigned int indices_dim_start = i * indices_x_dim; + bool out_of_bounds = false; + unsigned int out_offset_acc = 0; + + // Iterate over each indices value for the relevant batch and accumulate the offset. + for(unsigned int j = 0; j < indices_x_dim; j++) + { + // Get first index value with i * indices_x_dim (iterating through y-dim/batch idx), then iterate through x dim by adding k + const int index_value = indices[indices_dim_start + j]; + const unsigned int out_dim = no_output_dims - (j+1); // Calculate corresponding output dim to current index value. + if(index_value < static_cast<int>(out_shape[out_dim]) && index_value >= 0) + { + out_offset_acc += (index_value * out_stride_at_idx[out_dim]); // offset accumulation + } + else + { + out_of_bounds = true; + break; + } + } + + // If not out of bounds, copy update tensor elements to output + if(!out_of_bounds) { - dst[index] = reduce_op(dst[index], updates[i], info.func); + for (unsigned int j = 0 ; j < data_stride; j++) + { + dst[out_offset_acc + j] = reduce_op(dst[out_offset_acc + j], updates[update_dim_start + j], info.func); + } } } return dst; } template <typename T> -SimpleTensor<T> scatter_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &updates, const SimpleTensor<uint32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info) +SimpleTensor<T> scatter_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info) { return scatter_layer_internal<T>(src, updates, indices, out_shape, info); } -template SimpleTensor<float> scatter_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &updates, const SimpleTensor<uint32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); - +template SimpleTensor<float> scatter_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<half> scatter_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<int32_t> scatter_layer(const SimpleTensor<int32_t> &src, const SimpleTensor<int32_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<uint32_t> scatter_layer(const SimpleTensor<uint32_t> &src, const SimpleTensor<uint32_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<int16_t> scatter_layer(const SimpleTensor<int16_t> &src, const SimpleTensor<int16_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<uint16_t> scatter_layer(const SimpleTensor<uint16_t> &src, const SimpleTensor<uint16_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<int8_t> scatter_layer(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<uint8_t> scatter_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/ScatterLayer.h b/tests/validation/reference/ScatterLayer.h index dc441a8894..97d5e70b0d 100644 --- a/tests/validation/reference/ScatterLayer.h +++ b/tests/validation/reference/ScatterLayer.h @@ -37,10 +37,10 @@ namespace validation namespace reference { template <typename T> -SimpleTensor<T> scatter_layer_internal(const SimpleTensor<T> &src, const SimpleTensor<T> &update, const SimpleTensor<uint32_t> &indices, const TensorShape &shape, const ScatterInfo &info); +SimpleTensor<T> scatter_layer_internal(const SimpleTensor<T> &src, const SimpleTensor<T> &update, const SimpleTensor<int32_t> &indices, const TensorShape &shape, const ScatterInfo &info); template <typename T> -SimpleTensor<T> scatter_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &update, const SimpleTensor<uint32_t> &indices, const TensorShape &shape, const ScatterInfo &info); +SimpleTensor<T> scatter_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &update, const SimpleTensor<int32_t> &indices, const TensorShape &shape, const ScatterInfo &info); } // namespace reference } // namespace validation } // namespace test |