diff options
Diffstat (limited to 'tests/validation/NEON')
-rw-r--r-- | tests/validation/NEON/ConvolutionLayer.cpp | 77 | ||||
-rw-r--r-- | tests/validation/NEON/GEMM.cpp | 145 | ||||
-rw-r--r-- | tests/validation/NEON/GEMMLowp.cpp | 133 | ||||
-rw-r--r-- | tests/validation/NEON/ReorderLayer.cpp | 46 | ||||
-rw-r--r-- | tests/validation/NEON/SoftmaxLayer.cpp | 37 | ||||
-rw-r--r-- | tests/validation/NEON/UNIT/RuntimeContext.cpp | 15 |
6 files changed, 352 insertions, 101 deletions
diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp index 7a9230d37a..d739d4e1a4 100644 --- a/tests/validation/NEON/ConvolutionLayer.cpp +++ b/tests/validation/NEON/ConvolutionLayer.cpp @@ -767,21 +767,33 @@ FIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath } #if defined(ARM_COMPUTE_ENABLE_BF16) - +// These tests currently only works with SVE length 256 +// If other SVE length is used a kernel will fail to be found +// This needs to be addressed in order to ensure it doesn't revert to FP32 kernels for systems with SVE length other than 256 FIXTURE_DATA_TEST_CASE(UC2_2_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL, combine(framework::dataset::make("DataType", { DataType::F32 }), framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo8i4_bf16 }))) { - ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT_EQUAL(_computed_weight_format, arm_compute::WeightFormat::OHWIo8i4_bf16, framework::LogLevel::ERRORS); + if(Scheduler::get().cpu_info().has_bf16() && (arm_gemm::utils::get_vector_length<float>() == 8)){ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT_EQUAL(_computed_weight_format, arm_compute::WeightFormat::OHWIo8i4_bf16, framework::LogLevel::ERRORS); + } + else{ + ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS); + } } FIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL, combine(framework::dataset::make("DataType", { DataType::F32 }), framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo8i4_bf16 }))) { - ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo8i4_bf16, framework::LogLevel::ERRORS); + if(Scheduler::get().cpu_info().has_bf16() && (arm_gemm::utils::get_vector_length<float>() == 8)){ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo8i4_bf16, framework::LogLevel::ERRORS); + } + else{ + ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS); + } } #endif // ARM_COMPUTE_ENABLE_BF16 @@ -852,20 +864,36 @@ FIXTURE_DATA_TEST_CASE(UC3_2_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<c combine(framework::dataset::make("DataType", { DataType::F32 }), framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY }))) { - ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS); + if(Scheduler::get().cpu_info().has_bf16()){ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS); + } + else{ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS); + } } FIXTURE_DATA_TEST_CASE(UC3_2_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL, combine(framework::dataset::make("DataType", { DataType::F32 }), framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY }))) { - ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS); + if(Scheduler::get().cpu_info().has_bf16()){ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS); + } + else{ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS); + } } #endif // ARM_COMPUTE_ENABLE_BF16 @@ -1141,7 +1169,7 @@ TEST_SUITE(Float) TEST_SUITE(BFLOAT16) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType", DataType::BFLOAT16)), + framework::dataset::make("DataType", Scheduler::get().cpu_info().has_bf16() ? DataType::BFLOAT16 : DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NHWC })), ActivationFunctionsDataset)) { @@ -1329,6 +1357,27 @@ FIXTURE_DATA_TEST_CASE(RunSmallSigned, NEGEMMConvolutionLayerQuantizedPerChannel // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); } + +FIXTURE_DATA_TEST_CASE(MemoryStressLargeChannels, NEGEMMConvolutionLayerQuantizedPerChannelFixture<int8_t>, + framework::DatasetMode::ALL, + combine( + make("In", TensorShape(1U)), + make("Weights", TensorShape(1U, 1U, 1U, 17000U)), + make("Biases", TensorShape(17000U)), + make("Out", TensorShape(1U, 1U, 17000U)), + make("Info", PadStrideInfo(1, 1, 0, 0)), + make("Dilation", Size2D(1, 1)), + make("ReshapeWeights", { true }), + make("DataType", { DataType::QASYMM8_SIGNED }), + make("DataLayout", { DataLayout::NHWC }), + make("QuantizationInfo", QuantizationInfo(0.5f, 10)), + make("ActivationInfo", ActivationLayerInfo()), + make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} + TEST_SUITE_END() // QSYMM8_PER_CHANNEL TEST_SUITE_END() // Quantized diff --git a/tests/validation/NEON/GEMM.cpp b/tests/validation/NEON/GEMM.cpp index f956cdfeda..5f6a402204 100644 --- a/tests/validation/NEON/GEMM.cpp +++ b/tests/validation/NEON/GEMM.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -51,6 +51,8 @@ namespace test { namespace validation { +using framework::dataset::make; + namespace { constexpr AbsoluteTolerance<float> tolerance_f(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for FP32 data types */ @@ -60,7 +62,7 @@ const AbsoluteTolerance<float> abs_tolerance_f16(0.2f); /**< Absolute constexpr float tolerance_num = 0.07f; /**< Tolerance number for FP16 data types */ #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ /** CNN data types */ -const auto CNNDataTypes = framework::dataset::make("DataType", +const auto CNNDataTypes = make("DataType", { #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC DataType::F16, @@ -68,8 +70,8 @@ const auto CNNDataTypes = framework::dataset::make("DataType", DataType::F32, }); -const auto data_interleave = framework::dataset::make("M", 8, 12) * framework::dataset::make("N", 8, 12); -const auto data_transpose = framework::dataset::make("M", 8, 14) * framework::dataset::make("N", 7, 14); +const auto data_interleave = make("M", 8, 12) * make("N", 8, 12); +const auto data_transpose = make("M", 8, 14) * make("N", 7, 14); /** Zero padding test */ template <typename FunctionType> @@ -204,16 +206,16 @@ TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL) // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( - framework::dataset::make("LhsInfo", { TensorInfo(TensorShape(27U, 13U), 1, DataType::S32), // Unsupported data type + make("LhsInfo", { TensorInfo(TensorShape(27U, 13U), 1, DataType::S32), // Unsupported data type TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), }), - framework::dataset::make("RhsInfo",{ TensorInfo(TensorShape(8U, 27U), 1, DataType::S32), + make("RhsInfo",{ TensorInfo(TensorShape(8U, 27U), 1, DataType::S32), TensorInfo(TensorShape(8U, 27U), 1, DataType::F32), })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(8U, 13U), 1, DataType::S32), + make("OutputInfo",{ TensorInfo(TensorShape(8U, 13U), 1, DataType::S32), TensorInfo(TensorShape(8U, 13U), 1, DataType::F32), })), - framework::dataset::make("Expected", { false, true })), + make("Expected", { false, true })), lhs_info, rhs_info, output_info, expected) { constexpr float alpha = 1.0; @@ -226,8 +228,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( // *INDENT-ON* TEST_SUITE(KERNEL_SELECTION) DATA_TEST_CASE(KernelSelection_mul_and_add, framework::DatasetMode::ALL, - combine(framework::dataset::make("CpuExt", std::string("NEON")), - framework::dataset::make("DataType", { DataType::F32, + combine(make("CpuExt", std::string("NEON")), + make("DataType", { DataType::F32, DataType::F16 })), cpu_ext, data_type) @@ -261,8 +263,8 @@ TEST_SUITE_END() // KERNEL_SELECTION TEST_SUITE(TRANSPOSE_1XW) using CpuGemmTranspose1xW = NESynthetizeFunctionWithZeroConstantKernelBorder<cpu::kernels::CpuGemmTranspose1xWKernel>; DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip( - framework::dataset::make("N", { 1, 23, 63, 101 }), - framework::dataset::make("K", { 1, 47, 29, 27 })), + make("N", { 1, 23, 63, 101 }), + make("K", { 1, 47, 29, 27 })), n_value, k_value) { bool status = validate_zero_padding<CpuGemmTranspose1xW>(n_value, k_value); @@ -271,7 +273,7 @@ DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip( TEST_SUITE(U32) using CpuGemmTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, CpuGemmTranspose1xW, uint32_t>; -FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * framework::dataset::make("DataType", DataType::U32)) +FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * make("DataType", DataType::U32)) { // Validate output validate(Accessor(_target), _reference); @@ -280,7 +282,7 @@ TEST_SUITE_END() // U32 TEST_SUITE(U16) using CpuGemmTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, CpuGemmTranspose1xW, uint16_t>; -FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * framework::dataset::make("DataType", DataType::U16)) +FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * make("DataType", DataType::U16)) { // Validate output validate(Accessor(_target), _reference); @@ -289,7 +291,7 @@ TEST_SUITE_END() // U16 TEST_SUITE(U8) using CpuGemmTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, CpuGemmTranspose1xW, uint8_t>; -FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * framework::dataset::make("DataType", DataType::U8)) +FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * make("DataType", DataType::U8)) { // Validate output validate(Accessor(_target), _reference); @@ -302,8 +304,8 @@ TEST_SUITE(INTERLEAVE_4X4) using CpuGemmInterleave4x4 = NESynthetizeFunctionWithZeroConstantKernelBorder<cpu::kernels::CpuGemmInterleave4x4Kernel>; DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip( - framework::dataset::make("M", { 1, 23, 63, 101 }), - framework::dataset::make("K", { 1, 47, 29, 27 })), + make("M", { 1, 23, 63, 101 }), + make("K", { 1, 47, 29, 27 })), m_value, k_value) { bool status = validate_zero_padding<cpu::kernels::CpuGemmInterleave4x4Kernel>(m_value, k_value); @@ -312,7 +314,7 @@ DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip( TEST_SUITE(U32) using CpuGemmInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, CpuGemmInterleave4x4, uint32_t>; -FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * framework::dataset::make("DataType", DataType::U32)) +FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * make("DataType", DataType::U32)) { // Validate output validate(Accessor(_target), _reference); @@ -321,7 +323,7 @@ TEST_SUITE_END() // U32 TEST_SUITE(U16) using CpuGemmInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, CpuGemmInterleave4x4, uint16_t>; -FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * framework::dataset::make("DataType", DataType::U16)) +FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * make("DataType", DataType::U16)) { // Validate output validate(Accessor(_target), _reference); @@ -330,7 +332,7 @@ TEST_SUITE_END() // U16 TEST_SUITE(U8) using CpuGemmInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, CpuGemmInterleave4x4, uint8_t>; -FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * framework::dataset::make("DataType", DataType::QASYMM8)) +FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * make("DataType", DataType::QASYMM8)) { // Validate output validate(Accessor(_target), _reference); @@ -345,15 +347,18 @@ using NEGEMMFixture = GEMMValidationFixture<Tensor, Accessor, NEGEMM, T>; template <typename T> using NEBatchedMatMulFixture = GEMMValidationFixture<Tensor, Accessor, NEGEMM, T, true, false, false, false, false, true>; +template <typename T> +using NEGEMMAccumulateFixture = GEMMAccumulateValidationFixture<Tensor, Accessor, NEGEMM, T>; + TEST_SUITE(Float) -DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip(framework::dataset::make("In0", { TensorShape(21U, 13U), +DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip(make("In0", { TensorShape(21U, 13U), TensorShape(31U, 1U), TensorShape(31U, 1U), TensorShape(8U, 2U), TensorShape(38U, 12U), TensorShape(32U, 1U) }), - framework::dataset::make("In1", { TensorShape(33U, 21U), + make("In1", { TensorShape(33U, 21U), TensorShape(23U, 31U), TensorShape(23U, 31U), TensorShape(16U, 8U), @@ -366,75 +371,111 @@ DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip(framework:: ARM_COMPUTE_EXPECT(status, framework::LogLevel::ERRORS); } +DATA_TEST_CASE(ValidateAccumulate, framework::DatasetMode::ALL, combine( + zip(make("In0",{ TensorShape(21U, 13U) }), + make("In1", { TensorShape(33U, 21U) }), + make("Dst", { TensorShape(33U, 13U) })), + zip( + make("alpha", { 1.0, 100.0, 1.0, 1.0 }), + make("beta", { 0.0, 0.0, 1.0, 1.0 }), + make("is_c_null", { false, false, false, true }), + make("Expected", { true, false, false, true }))), + shape_a, shape_b, shape_dst, alpha, beta, is_c_null, expected) +{ + /* Accumulation test for GEMM kernels */ + // Create tensors + TensorInfo in_a(shape_a, 1, DataType::F32); + TensorInfo in_b(shape_b, 1, DataType::F32); + TensorInfo in_c(shape_dst, 1, DataType::F32); + TensorInfo dst(shape_dst, 1, DataType::F32); + + GEMMInfo gemm_info = GEMMInfo(); + gemm_info.set_accumulate(true); + + // Validate accumulation + cpu::CpuGemm gemm; + Status status = gemm.validate(&in_a, &in_b, (is_c_null ? nullptr : &in_c), &dst, alpha, beta, gemm_info); + ARM_COMPUTE_EXPECT((expected == bool(status)), framework::LogLevel::ERRORS); +} + #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(), - framework::dataset::make("ReshapeWeights", { true, false })), - framework::dataset::make("DataType", DataType::F16))) + make("ReshapeWeights", { true, false })), + make("DataType", DataType::F16))) { // Validate output validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); } - -TEST_SUITE(BATCHED_MATMUL) - -FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchedMatMulFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallBatchedMatMulDataset(), - framework::dataset::make("ReshapeWeights", { false })), - framework::dataset::make("DataType", DataType::F16))) +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeGEMMDataset(), + make("ReshapeWeights", { true, false })), + make("DataType", DataType::F16))) { // Validate output validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); } -TEST_SUITE_END() -FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeGEMMDataset(), - framework::dataset::make("ReshapeWeights", { true, false })), - - framework::dataset::make("DataType", DataType::F16))) +TEST_SUITE(BATCHED_MATMUL) +FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchedMatMulFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallBatchedMatMulDataset(), + make("ReshapeWeights", { false })), + make("DataType", DataType::F16))) { // Validate output validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); } -TEST_SUITE_END() +TEST_SUITE_END() // BATCHED_MATMUL + +TEST_SUITE_END() // FP16 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(), - framework::dataset::make("ReshapeWeights", { true, false })), - - framework::dataset::make("DataType", DataType::F32))) + make("ReshapeWeights", { true, false })), + make("DataType", DataType::F32))) { // Validate output validate(Accessor(_target), _reference, tolerance_f); } FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeGEMMDataset(), - framework::dataset::make("ReshapeWeights", { true, false })), - - framework::dataset::make("DataType", DataType::F32))) + make("ReshapeWeights", { true, false })), + make("DataType", DataType::F32))) { // Validate output validate(Accessor(_target), _reference, tolerance_f); } TEST_SUITE(BATCHED_MATMUL) - -TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchedMatMulFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallBatchedMatMulDataset(), - framework::dataset::make("ReshapeWeights", { false })), - framework::dataset::make("DataType", DataType::F32))) + make("ReshapeWeights", { false })), + make("DataType", DataType::F32))) { // Validate output validate(Accessor(_target), _reference, tolerance_f); } -TEST_SUITE_END() +TEST_SUITE_END() // BATCHED_MATMUL -TEST_SUITE_END() +TEST_SUITE(ACCUMULATE) +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMAccumulateFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallAccumulateGEMMDataset(), + make("ReshapeWeights", { false }), + make("DataType", DataType::F32))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMAccumulateFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeAccumulateGEMMDataset(), + make("ReshapeWeights", { false }), + make("DataType", DataType::F32))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f); +} +TEST_SUITE_END() // ACCUMULATE -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE_END() // FP32 -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE_END() // Float +TEST_SUITE_END() // GEMM +TEST_SUITE_END() // NEON } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/NEON/GEMMLowp.cpp b/tests/validation/NEON/GEMMLowp.cpp index 9c4d1741eb..d25f43a330 100644 --- a/tests/validation/NEON/GEMMLowp.cpp +++ b/tests/validation/NEON/GEMMLowp.cpp @@ -47,12 +47,24 @@ namespace test { namespace validation { +using framework::dataset::make; + +namespace +{ + constexpr AbsoluteTolerance<float> tolerance_batched(1); + constexpr AbsoluteTolerance<float> tolerance_quant(1); +} // namespace + + TEST_SUITE(NEON) TEST_SUITE(GEMMLowp) TEST_SUITE(MatrixMultiplyCore) using NEGEMMLowpMatrixMultiplyCoreFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>; +using NEGEMMLowpMatrixMultiplyCoreAccumulateFixture = GEMMLowpMatrixMultiplyAccumulateValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>; using NEGEMMLowpBatchedMatMulFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, true>; +using NEGEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture = GEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>; +using NEGEMMLowpDequantizedMatrixMultiplyValidationFixture = GEMMLowpDequantizedMatrixMultiplyValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>; using framework::dataset::make; @@ -80,6 +92,46 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, framework::dataset::c validate(b.info()->padding(), PaddingSize()); validate(c.info()->padding(), PaddingSize()); } +// accumulation is not supported for Int8/UInt8 in aarch32 +#ifdef __aarch64__ +DATA_TEST_CASE(ValidateAccumulate, framework::DatasetMode::ALL, combine( + zip( + make("In0",{ TensorShape(21U, 1U) }), + make("In1", { TensorShape(1U, 21U) }), + make("Dst", { TensorShape(1U, 1U) }), + make("a_offset", { -2 }), + make("a_offset", { 13 }) + ), + zip( + make("OutputDataType", { DataType::S32, DataType::QASYMM8, DataType::QASYMM8_SIGNED}), + make("Expected", { true, false, false }) + )), + shape_a, shape_b, shape_dst, a_offset, b_offset, output_data_type, expected) +{ + DataType input_data_type = (output_data_type == DataType::S32 ? DataType::QASYMM8 : output_data_type); + // Accumulation test for GEMM kernels + TensorInfo a(shape_a, 1, input_data_type, QuantizationInfo(1.0f / 255, a_offset)); + TensorInfo b(shape_b, 1, input_data_type, QuantizationInfo(1.0f / 255, b_offset)); + TensorInfo dst(shape_dst, 1, output_data_type, QuantizationInfo()); + + // Create and configure function + GEMMInfo gemm_info = GEMMInfo(); + gemm_info.set_accumulate(true); + + if (is_data_type_quantized(output_data_type)) + { + GEMMLowpOutputStageInfo gemmLowpOutputStageInfo = GEMMLowpOutputStageInfo(); + gemmLowpOutputStageInfo.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; + + gemm_info.set_gemmlowp_output_stage(gemmLowpOutputStageInfo); + } + + cpu::CpuGemmLowpMatrixMultiplyCore gemmlowp_mm; + Status status = gemmlowp_mm.validate(&a, &b, nullptr, &dst, gemm_info); + + ARM_COMPUTE_EXPECT((expected == bool(status)), framework::LogLevel::ERRORS); +} +#endif // __arch64__ // *INDENT-OFF* // clang-format off @@ -226,13 +278,10 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFixture, framework: validate(Accessor(_target), _reference); } -constexpr AbsoluteTolerance<float> tolerance_batched(1); - -using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned = - GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, uint8_t, uint8_t, true>; - TEST_SUITE(BatchedMatMul) TEST_SUITE(QASYMM8) +using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned = + GEMMLowpBatchedMatrixMultiplyCoreFusedOffsetOutputFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, uint8_t, uint8_t, true>; FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(), make("DataType", { DataType::QASYMM8 }), @@ -242,9 +291,9 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFi } TEST_SUITE_END() // QASYMM8 -using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned = - GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t, true>; TEST_SUITE(QASYMM8_SIGNED) +using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned = + GEMMLowpBatchedMatrixMultiplyCoreFusedOffsetOutputFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t, true>; FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(), make("DataType", { DataType::QASYMM8_SIGNED }), @@ -255,26 +304,84 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFi TEST_SUITE_END() // QASYMM8_SIGNED TEST_SUITE_END() // BatchedMatMul -using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>; -constexpr AbsoluteTolerance<float> tolerance_quant(1); - TEST_SUITE(FusedOffsetOutput) +using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>; FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(), - make("DataType", { DataType::QASYMM8 }))) + make("DataType", { DataType::QASYMM8 }), + make("reshape_b_only_on_first_run", { false }))) { // Validate output validate(Accessor(_target), _reference, tolerance_quant); } - FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(), - make("DataType", { DataType::QASYMM8 }))) + make("DataType", { DataType::QASYMM8 }), + make("reshape_b_only_on_first_run", { false }))) { // Validate output validate(Accessor(_target), _reference, tolerance_quant); } TEST_SUITE_END() // FusedOffsetOutput + +// accumulation is not supported for Int8/UInt8 in aarch32 +#ifdef __aarch64__ +TEST_SUITE(ACCUMULATION) +TEST_SUITE(S32) +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreAccumulateFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset()) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreAccumulateFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpDataset()) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() // S32 +TEST_SUITE_END() // ACCUMULATION +#endif // __arch64__ + +TEST_SUITE(DynamicQuantization) +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset()) +{ + // Validate output + validate(Accessor(_target), _reference); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpDataset()) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() // DynamicQuantization + +#ifdef __aarch64__ +// Deqaunt tests involve returning F32 from the MatrixMultiplyCore kernels and is only implemented in aarch64 +TEST_SUITE(Dequant) +constexpr AbsoluteTolerance<float> tolerance_dequantized(0.01f); +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpDequantizedMatrixMultiplyValidationFixture, framework::DatasetMode::ALL, + combine( + datasets::SmallGEMMLowpDataset(), + make("accumulate", {true, false}) + )) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_dequantized); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpDequantizedMatrixMultiplyValidationFixture, framework::DatasetMode::NIGHTLY, + combine( + datasets::LargeGEMMLowpDataset(), + make("accumulate", {false}) + )) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_dequantized); +} +TEST_SUITE_END() // Dequant +#endif // __aarch64__ + TEST_SUITE_END() // MatrixMultiplyCore TEST_SUITE_END() // GEMMLowp TEST_SUITE_END() // NEON diff --git a/tests/validation/NEON/ReorderLayer.cpp b/tests/validation/NEON/ReorderLayer.cpp index 42fa0f8b00..839ad0ac92 100644 --- a/tests/validation/NEON/ReorderLayer.cpp +++ b/tests/validation/NEON/ReorderLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -33,6 +33,7 @@ #include "tests/validation/Validation.h" #include "tests/validation/fixtures/ReorderFixture.h" #include "src/core/NEON/kernels/NEReorderKernel.h" +#include "src/core/NEON/kernels/arm_gemm/utils.hpp" namespace arm_compute { @@ -40,6 +41,8 @@ namespace test { namespace validation { +using framework::dataset::make; + TEST_SUITE(NEON) TEST_SUITE(ReorderLayer) @@ -48,13 +51,46 @@ using NEReorderLayerAlias = ReorderValidationFixture<Tensor, Accessor, NEReorder TEST_SUITE(FP32) #if defined(ARM_COMPUTE_ENABLE_SVE) -FIXTURE_DATA_TEST_CASE(RunBlock8, NEReorderLayerAlias<float>, framework::DatasetMode::ALL, combine(datasets::ReorderLayerDatasetBlock8(), framework::dataset::make("DataType", DataType::F32))) +DATA_TEST_CASE(ValidateReorderOHWIo8, framework::DatasetMode::ALL, combine( + zip( + make("InShape",{ TensorShape(10U, 9U), TensorShape(234U, 301U) }), + make("OutShape", { TensorShape(10U, 16U), TensorShape(234U, 304U) }) + ), + zip( + make("InputWeightFormat", {WeightFormat::OHWI}), + make("OutputWeightFormat", {WeightFormat::OHWIo8}) + )), + input_shape, output_shape, input_wf, output_wf) +{ + if(Scheduler::get().cpu_info().has_sve()){ + arm_compute::NEReorderLayer reorder_layer; + int vector_length = arm_gemm::utils::get_vector_length<float>(); + bool expected_bool_status = false; + if (vector_length == 8) + { + expected_bool_status = true; + } + + TensorInfo input_tensor_info(input_shape, 1, DataType::F32); + TensorInfo output_tensor_info(output_shape, 1, DataType::F32); + + Status status = reorder_layer.validate(&input_tensor_info, &output_tensor_info, input_wf, output_wf); + + ARM_COMPUTE_EXPECT((expected_bool_status == bool(status)), framework::LogLevel::ERRORS); + } +} + +FIXTURE_DATA_TEST_CASE(RunBlock8, NEReorderLayerAlias<float>, framework::DatasetMode::ALL, combine(datasets::ReorderLayerDatasetBlock8(), make("DataType", DataType::F32))) { // Validate output - validate(Accessor(_target), _reference); + if (_hardware_supports) + { + validate(Accessor(_target), _reference); + } } #endif // ARM_COMPUTE_ENABLE_SVE -FIXTURE_DATA_TEST_CASE(RunBlock4, NEReorderLayerAlias<float>, framework::DatasetMode::ALL, combine(datasets::ReorderLayerDatasetBlock4(), framework::dataset::make("DataType", DataType::F32))) + +FIXTURE_DATA_TEST_CASE(RunBlock4, NEReorderLayerAlias<float>, framework::DatasetMode::ALL, combine(datasets::ReorderLayerDatasetBlock4(), make("DataType", DataType::F32))) { // Validate output validate(Accessor(_target), _reference); @@ -68,4 +104,4 @@ TEST_SUITE_END() // NEON } // namespace test } // namespace arm_compute -#endif // defined(__aarch64__)
\ No newline at end of file +#endif // defined(__aarch64__) diff --git a/tests/validation/NEON/SoftmaxLayer.cpp b/tests/validation/NEON/SoftmaxLayer.cpp index 2397d81547..94d0866c38 100644 --- a/tests/validation/NEON/SoftmaxLayer.cpp +++ b/tests/validation/NEON/SoftmaxLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, 2022-2023 Arm Limited. + * Copyright (c) 2017-2020, 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -122,40 +122,35 @@ template <typename T> using NESoftmaxLayerFixture = SoftmaxValidationFixture<Tensor, Accessor, NESoftmaxLayer, T>; DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, - concat(concat( + concat( combine( - make("CpuExt", std::string("NEON")), + make("CpuExt", std::string("neon")), make("DataType", { DataType::F32, DataType::F16, DataType::QASYMM8, DataType::QASYMM8_SIGNED}) ), combine( - make("CpuExt", std::string("SVE")), + make("CpuExt", std::string("sme2")), make("DataType", { DataType::F32, DataType::F16})) ), - combine( - make("CpuExt", std::string("SVE2")), - make("DataType", { DataType::QASYMM8, - DataType::QASYMM8_SIGNED})) - ), cpu_ext, data_type) { using namespace cpu::kernels; cpuinfo::CpuIsaInfo cpu_isa{}; - cpu_isa.neon = (cpu_ext == "NEON"); - cpu_isa.sve = (cpu_ext == "SVE"); - cpu_isa.sve2 = (cpu_ext == "SVE2"); + cpu_isa.neon = (cpu_ext == "neon"); + cpu_isa.sme2 = (cpu_ext == "sme2"); cpu_isa.fp16 = (data_type == DataType::F16); const auto *selected_impl = CpuSoftmaxKernel::get_implementation( - SoftmaxKernelDataTypeISASelectorData{ data_type, cpu_isa, false /* is_log */ }, cpu::KernelSelectionType::Preferred); + SoftmaxKernelDataTypeISASelectorData{ data_type, cpu_isa, false /* is_log */, 0 /* axis */, CPUInfo::get().get_sme2_vector_length()}, + cpu::KernelSelectionType::Preferred); ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl); - std::string expected = "neon_" + cpu_impl_dt(data_type) + "_softmax"; + std::string expected = cpu_ext + "_" + cpu_impl_dt(data_type) + "_softmax"; std::string actual = selected_impl->name; ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS); @@ -164,9 +159,19 @@ DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT, + combine( + datasets::SoftmaxLayerSmallShapes(), + make("DataType", DataType::F16), + make("Beta", { 1.0f, 2.0f }), + make("Axis", { 0, -1 }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); +} FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine( - datasets::Small4DShapes(), + datasets::SmallShapes(), make("DataType", DataType::F16), make("Beta", { 1.0f, 2.0f }), make("Axis", { 0, 1 }))) @@ -178,7 +183,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerFixture<half>, framework::Datas combine( datasets::Small4DShapes(), make("DataType", DataType::F16), - make("Beta", { 1.0f, 2.0f }), + make("Beta", { 1.0f }), make("Axis", { 0, 2, -1 }))) { // Validate output diff --git a/tests/validation/NEON/UNIT/RuntimeContext.cpp b/tests/validation/NEON/UNIT/RuntimeContext.cpp index 819811943d..e0d45c639a 100644 --- a/tests/validation/NEON/UNIT/RuntimeContext.cpp +++ b/tests/validation/NEON/UNIT/RuntimeContext.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -48,6 +48,19 @@ namespace validation { TEST_SUITE(NEON) TEST_SUITE(UNIT) +#if defined(ARM_COMPUTE_OPENMP_SCHEDULER) && !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__) +TEST_CASE(CpuCapacity, framework::DatasetMode::ALL) +{ + CPUInfo& ci = arm_compute::Scheduler::get().cpu_info(); + const uint32_t nonlittle_num_cpus = ci.get_cpu_num_excluding_little(); + const uint32_t num_threads = arm_compute::Scheduler::get().num_threads(); + + ARM_COMPUTE_EXPECT(num_threads<=nonlittle_num_cpus , framework::LogLevel::ERRORS); +} +#endif /* defined(ARM_COMPUTE_OPENMP_SCHEDULER) && !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/ + TEST_SUITE(RuntimeContext) TEST_CASE(Scheduler, framework::DatasetMode::ALL) |