From 93581a524a8e66ed29ace892bc5cb297287802af Mon Sep 17 00:00:00 2001 From: Pablo Marquez Tello Date: Thu, 21 Jul 2022 13:55:27 +0100 Subject: [ONCPUML-970] Fast math mode for fixed format kernels Minor tweaks and test for running fixed format kernels with BF16 operations when specified by the user. Change-Id: Ic8167f67b86b1298da65e46cfebed9f3b86940e4 Signed-off-by: Milos Puzovic Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8000 Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- arm_compute/core/Types.h | 10 +- .../NEON/kernels/arm_gemm/gemm_interleaved.hpp | 2 +- .../operators/internal/CpuGemmAssemblyDispatch.cpp | 9 +- tests/main.cpp | 3 +- tests/validation/NEON/ConvolutionLayer.cpp | 168 +++++++++++++++------ .../validation/fixtures/ConvolutionLayerFixture.h | 35 ++--- 6 files changed, 159 insertions(+), 68 deletions(-) diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 66e1c8ab1f..952c174194 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -1987,16 +1987,20 @@ enum class WeightFormat // OHWIoi inline int interleave_by(const WeightFormat wf) { - return ((int)wf >> 8) & 0xFFF; + return (static_cast(wf) >> 8) & 0xFFF; } inline int block_by(const WeightFormat wf) { - return ((int)wf >> 20) & 0xF; + return (static_cast(wf) >> 20) & 0xF; } -inline bool is_fixed_format(const WeightFormat wf) +inline bool is_fixed_format(const WeightFormat &wf) { return wf != WeightFormat::UNSPECIFIED && wf != WeightFormat::ANY; } +inline bool is_fixed_format_fast_math(const WeightFormat &wf) +{ + return (static_cast(wf) >> 4) & 0x1; +} /** Convolution Layer Weights Information class. This class stores the necessary information to compute convolution layer when the weights are already reshaped */ class WeightsInfo diff --git a/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp b/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp index 6e14a68438..470cee1557 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp @@ -695,7 +695,7 @@ public: #endif /* Make sure we've been set up correctly. */ - assert(_B_transposed); + assert(FixedFormat || _B_transposed); assert(_working_space); int8_t *working_space_bytes = reinterpret_cast(_working_space); diff --git a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp index df02d649f8..77da83070b 100644 --- a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp +++ b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp @@ -25,6 +25,7 @@ #include "arm_compute/runtime/NEON/NEScheduler.h" #include "src/core/CPP/Validate.h" +#include "src/core/NEON/kernels/arm_gemm/utils.hpp" #include "src/core/helpers/MemoryHelpers.h" #include "src/core/utils/AssemblyUtils.h" #include "src/cpu/kernels/assembly/CpuGemmAssemblyWrapperKernel.h" @@ -507,14 +508,20 @@ void Fallback::run(ITensorPack &tensors) const TensorShape tensor_shape = tensor_info->tensor_shape(); const int tensor_height = tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT)]; const int tensor_width = tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH)]; - const int tensor_channels = tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL)]; + int tensor_channels = tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL)]; const int interleave_by = arm_compute::interleave_by(wf); + const int blocked_by = arm_compute::block_by(wf); // We need to find a new stride that is distance from the data for one // set of output channels to the next if(ldb == tensor_channels && multi_stride_b == tensor_channels * tensor_width) { // In this case dimensions that are packed are height, width and channel // so we need to stride it by interleave_by + if(tensor_channels % blocked_by != 0) + { + // We need to pad + tensor_channels = arm_gemm::iceildiv(tensor_channels, blocked_by) * blocked_by; + } ldb = interleave_by * tensor_height * tensor_width * tensor_channels; } else if(multi_stride_b == 0 || (ldb == tensor_width && multi_stride_b == tensor_height * tensor_width)) diff --git a/tests/main.cpp b/tests/main.cpp index bc264de378..ba18339e50 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -240,6 +240,7 @@ int main(int argc, char **argv) p->print_entry("cpu_has_fp16", support::cpp11::to_string(cpu_info.has_fp16())); p->print_entry("cpu_has_bf16", support::cpp11::to_string(cpu_info.has_bf16())); p->print_entry("cpu_has_dotprod", support::cpp11::to_string(cpu_info.has_dotprod())); + p->print_entry("cpu_has_svebf16", support::cpp11::to_string(cpu_info.has_svebf16())); for(unsigned int j = 0; j < num_cpus; ++j) { diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp index 0194220e1a..67f7c8896f 100644 --- a/tests/validation/NEON/ConvolutionLayer.cpp +++ b/tests/validation/NEON/ConvolutionLayer.cpp @@ -509,23 +509,45 @@ TEST_SUITE(VariableWeightUtils) // UC2_1_* tests: the user requests a specific fixed format, but there is no kernel that supports it. -FIXTURE_DATA_TEST_CASE(UC2_1_CpuGemmConv2d, HasOptImplFixture, framework::DatasetMode::ALL, +template +using HasOptImplFixtureNoFastMath = HasOptImplFixture; + +template +using HasOptImplFixtureFastMath = HasOptImplFixture; + +// UC2_1 + +FIXTURE_DATA_TEST_CASE(UC2_1_CpuGemmConv2d, HasOptImplFixtureNoFastMath, framework::DatasetMode::ALL, + combine(framework::dataset::make("DataType", { DataType::F32 }), + framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo2 }))) +{ + ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS); +} +FIXTURE_DATA_TEST_CASE(UC2_1_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath, framework::DatasetMode::ALL, combine(framework::dataset::make("DataType", { DataType::F32 }), framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo2 }))) { ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS); } -FIXTURE_DATA_TEST_CASE(UC2_1_NEGEMMConvolutionLayer, HasOptImplFixture, framework::DatasetMode::ALL, + +FIXTURE_DATA_TEST_CASE(UC2_1_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath, framework::DatasetMode::ALL, combine(framework::dataset::make("DataType", { DataType::F32 }), framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo2 }))) { ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS); } -// UC2_1_* tests: the user requests a specific fixed format, and a +FIXTURE_DATA_TEST_CASE(UC2_1_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath, framework::DatasetMode::ALL, + combine(framework::dataset::make("DataType", { DataType::F32 }), + framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo2 }))) +{ + ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS); +} + +// UC2_2_* tests: the user requests a specific fixed format, and a // kernel that support that fixed format is found. -FIXTURE_DATA_TEST_CASE(UC2_2_CpuGemmConv2d, HasOptImplFixture, framework::DatasetMode::ALL, +FIXTURE_DATA_TEST_CASE(UC2_2_CpuGemmConv2d, HasOptImplFixtureNoFastMath, framework::DatasetMode::ALL, combine(framework::dataset::make("DataType", { DataType::F32 }), framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo4 }))) { @@ -533,7 +555,7 @@ FIXTURE_DATA_TEST_CASE(UC2_2_CpuGemmConv2d, HasOptImplFixture, framework::DatasetMode::ALL, +FIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath, framework::DatasetMode::ALL, combine(framework::dataset::make("DataType", { DataType::F32 }), framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo4 }))) { @@ -541,19 +563,49 @@ FIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer, HasOptImplFixture, framework::DatasetMode::ALL, + combine(framework::dataset::make("DataType", { DataType::F32 }), + framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo8i4_bf16 }))) +{ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT_EQUAL(_computed_weight_format, arm_compute::WeightFormat::OHWIo8i4_bf16, framework::LogLevel::ERRORS); +} + +FIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath, framework::DatasetMode::ALL, + combine(framework::dataset::make("DataType", { DataType::F32 }), + framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo8i4_bf16 }))) +{ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo8i4_bf16, framework::LogLevel::ERRORS); +} + // UC3_1_* tests: the user queries for ANY fixed format, but there is // no kernel that support the use case specified by the user (for // example, there is no fixed format kernel for the datatype of the // problem). -FIXTURE_DATA_TEST_CASE(UC3_1_CpuGemmConv2d, HasOptImplFixture, framework::DatasetMode::ALL, +FIXTURE_DATA_TEST_CASE(UC3_1_CpuGemmConv2d, HasOptImplFixtureNoFastMath, framework::DatasetMode::ALL, combine(framework::dataset::make("DataType", { DataType::S32 }), framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY }))) { ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS); } -FIXTURE_DATA_TEST_CASE(UC3_1_NEGEMMConvolutionLayer, HasOptImplFixture, framework::DatasetMode::ALL, +FIXTURE_DATA_TEST_CASE(UC3_1_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath, framework::DatasetMode::ALL, + combine(framework::dataset::make("DataType", { DataType::S32 }), + framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY }))) +{ + ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS); +} + +FIXTURE_DATA_TEST_CASE(UC3_1_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath, framework::DatasetMode::ALL, + combine(framework::dataset::make("DataType", { DataType::S32 }), + framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY }))) +{ + ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS); +} + +FIXTURE_DATA_TEST_CASE(UC3_1_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath, framework::DatasetMode::ALL, combine(framework::dataset::make("DataType", { DataType::S32 }), framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY }))) { @@ -570,7 +622,7 @@ FIXTURE_DATA_TEST_CASE(UC3_1_NEGEMMConvolutionLayer, HasOptImplFixture, framework::DatasetMode::ALL, +FIXTURE_DATA_TEST_CASE(UC3_2_CpuGemmConv2d, HasOptImplFixtureNoFastMath, framework::DatasetMode::ALL, combine(framework::dataset::make("DataType", { DataType::F32 }), framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY }))) { @@ -579,7 +631,7 @@ FIXTURE_DATA_TEST_CASE(UC3_2_CpuGemmConv2d, HasOptImplFixture, framework::DatasetMode::ALL, +FIXTURE_DATA_TEST_CASE(UC3_2_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath, framework::DatasetMode::ALL, combine(framework::dataset::make("DataType", { DataType::F32 }), framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY }))) { @@ -587,6 +639,26 @@ FIXTURE_DATA_TEST_CASE(UC3_2_NEGEMMConvolutionLayer, HasOptImplFixture, framework::DatasetMode::ALL, + combine(framework::dataset::make("DataType", { DataType::F32 }), + framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY }))) +{ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS); +} + +FIXTURE_DATA_TEST_CASE(UC3_2_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath, framework::DatasetMode::ALL, + combine(framework::dataset::make("DataType", { DataType::F32 }), + framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY }))) +{ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS); +} + namespace { using TestCaseType = std::tuple; @@ -669,7 +741,7 @@ TEST_SUITE_END() // VariableWeightUtils TEST_SUITE(ExperimentalCpuAPIVariableWeightWithFixtures) template -using VarWidth = VariableWeightsFixture; +using VarWidth = VariableWeightsFixture; FIXTURE_DATA_TEST_CASE(RunSmallFloat, VarWidth, framework::DatasetMode::ALL, combine(combine(datasets::SmallConvolutionLayerDataset(), @@ -689,12 +761,26 @@ FIXTURE_DATA_TEST_CASE(RunSmallHalf, VarWidth, framework::DatasetMode::ALL validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, half(abs_tolerance_f16)); } +#if defined(ARM_COMPUTE_ENABLE_BF16) +template +using VarWidthFastMath = VariableWeightsFixture; + +FIXTURE_DATA_TEST_CASE(RunSmallFloatFastMath, VarWidthFastMath, framework::DatasetMode::ALL, + combine(combine(datasets::SmallConvolutionLayerDataset(), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("ACL Scalar type", { DataType::F32 }))) +{ + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32)); +} +#endif // ARM_COMPUTE_ENABLE_BF16 + TEST_SUITE_END() // ExperimentalCpuAPIVariableWeightWithFixtures TEST_SUITE(ExperimentalNEAPIVariableWeightWithFixtures) template -using NEGEMMVarWidth = VariableWeightsFixtureNEInterface; +using NEGEMMVarWidth = VariableWeightsFixtureNEInterface; FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallFloat, NEGEMMVarWidth, framework::DatasetMode::ALL, combine(combine(datasets::SmallConvolutionLayerDataset(), @@ -714,6 +800,20 @@ FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallHalf, NEGEMMVarWidth, framework::Data validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, half(abs_tolerance_f16)); } +#if defined(ARM_COMPUTE_ENABLE_BF16) +template +using NEGEMMVarWidthFastMath = VariableWeightsFixtureNEInterface; + +FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallFloatFastMath, NEGEMMVarWidthFastMath, framework::DatasetMode::ALL, + combine(combine(datasets::SmallConvolutionLayerDataset(), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("ACL Scalar type", { DataType::F32 }))) +{ + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32)); +} +#endif // ARM_COMPUTE_ENABLE_BF16 + TEST_SUITE_END() // ExperimentalNEAPIVariableWeightWithFixtures #endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS @@ -823,9 +923,7 @@ TEST_SUITE(Float) #if defined(ARM_COMPUTE_ENABLE_BF16) TEST_SUITE(BFLOAT16) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType", DataType::BFLOAT16)), - framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::BFLOAT16)), framework::dataset::make("DataLayout", { DataLayout::NHWC })), ActivationFunctionsDataset)) { // Validate output @@ -837,10 +935,7 @@ TEST_SUITE_END() // BFLOAT16 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - ActivationFunctionsDataset)) + framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW })), ActivationFunctionsDataset)) { // Validate output validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); @@ -850,9 +945,7 @@ TEST_SUITE_END() // FP16 TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), ActivationFunctionsDataset)) { // Validate output @@ -894,11 +987,8 @@ const auto QuantizedActivationFunctionsDataset = framework::dataset::make("Activ TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), - QuantizedActivationFunctionsDataset)) + framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), QuantizedActivationFunctionsDataset)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); @@ -924,11 +1014,8 @@ TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })), - QuantizedActivationFunctionsDataset)) + framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })), QuantizedActivationFunctionsDataset)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); @@ -1082,10 +1169,7 @@ TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL) TEST_SUITE(Float) TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NHWC })), - ActivationFunctionsDataset)) + framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NHWC })), ActivationFunctionsDataset)) { // Validate output validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32)); @@ -1109,11 +1193,8 @@ const auto QuantizedActivationFunctionsDataset = framework::dataset::make("Activ TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerQuantizedFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("DataLayout", { DataLayout::NHWC })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), - QuantizedActivationFunctionsDataset)) + framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), QuantizedActivationFunctionsDataset)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); @@ -1122,11 +1203,8 @@ TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerQuantizedFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), - framework::dataset::make("DataLayout", { DataLayout::NHWC })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })), - QuantizedActivationFunctionsDataset)) + framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })), QuantizedActivationFunctionsDataset)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); diff --git a/tests/validation/fixtures/ConvolutionLayerFixture.h b/tests/validation/fixtures/ConvolutionLayerFixture.h index c58a0a2c91..63e6dc9377 100644 --- a/tests/validation/fixtures/ConvolutionLayerFixture.h +++ b/tests/validation/fixtures/ConvolutionLayerFixture.h @@ -122,14 +122,14 @@ protected: { case DataType::QASYMM8: { - std::pair bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f); + std::pair bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f); std::uniform_int_distribution distribution(bounds.first, bounds.second); library->fill(tensor, distribution, i); break; } case DataType::QASYMM8_SIGNED: { - std::pair bounds = get_quantized_qasymm8_signed_bounds(tensor.quantization_info(), -1.0f, 1.0f); + std::pair bounds = get_quantized_qasymm8_signed_bounds(tensor.quantization_info(), -1.0f, 1.0f); std::uniform_int_distribution distribution(bounds.first, bounds.second); library->fill(tensor, distribution, i); break; @@ -476,7 +476,7 @@ inline void rearrange_data(const AccessorType src, AccessorType dst, const arm_c } } -template +template class VariableWeightsFixtureBaseClass : public framework::Fixture { public: @@ -581,14 +581,14 @@ protected: SimpleTensor _reference{}; }; -template -class VariableWeightsFixture : public VariableWeightsFixtureBaseClass +template +class VariableWeightsFixture : public VariableWeightsFixtureBaseClass { void configure_and_execute_kernel(TensorInfo src_tensor_info, TensorInfo weight_tensor_info, TensorInfo bias_tensor_info, TensorInfo dst_tensor_info, const WeightsInfo weights_info, const PadStrideInfo &conv_info, const Size2D &dilation) { - this->conv->configure(&src_tensor_info, &weight_tensor_info, &bias_tensor_info, &dst_tensor_info, conv_info, weights_info, dilation); + this->conv->configure(&src_tensor_info, &weight_tensor_info, &bias_tensor_info, &dst_tensor_info, conv_info, weights_info, dilation, ActivationLayerInfo(), enable_fast_math); // Allocate input tensors auto src = create_tensor(src_tensor_info); @@ -624,8 +624,8 @@ class VariableWeightsFixture : public VariableWeightsFixtureBaseClass -class VariableWeightsFixtureNEInterface : public VariableWeightsFixtureBaseClass +template +class VariableWeightsFixtureNEInterface : public VariableWeightsFixtureBaseClass { void configure_and_execute_kernel(TensorInfo src_tensor_info, TensorInfo weight_tensor_info, TensorInfo bias_tensor_info, TensorInfo dst_tensor_info, const WeightsInfo weights_info, const PadStrideInfo &conv_info, @@ -644,7 +644,7 @@ class VariableWeightsFixtureNEInterface : public VariableWeightsFixtureBaseClass // Allocate destination tensor this->_target = create_tensor(dst_tensor_info); this->_target.allocator()->allocate(); - this->conv->configure(&src, &weights_transformed, &bias, &(this->_target), conv_info, weights_info, dilation); + this->conv->configure(&src, &weights_transformed, &bias, &(this->_target), conv_info, weights_info, dilation, ActivationLayerInfo(), enable_fast_math); // Prepare source and biases that are left unchanged. this->fill(AccessorType(src), 0); this->fill(AccessorType(bias), 1); @@ -664,7 +664,7 @@ class VariableWeightsFixtureNEInterface : public VariableWeightsFixtureBaseClass } }; -template +template class HasOptImplFixture : public framework::Fixture { public: @@ -672,14 +672,15 @@ public: void setup(DataType data_type, arm_compute::WeightFormat query_weight_format) { auto conv = std::make_unique(); - const auto src_info = TensorInfo(TensorShape(1U, 5U, 2U), 1, data_type, DataLayout::NHWC); - const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, data_type, DataLayout::NHWC); - const auto bias_info = TensorInfo(TensorShape(3U), 1, data_type, DataLayout::NHWC); - auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, data_type, DataLayout::NHWC); - const auto conv_info = PadStrideInfo(1, 1, 0, 0, 2, 2, DimensionRoundingType::FLOOR); - const WeightsInfo weights_info(false, 3U, 3U, 1U, false, query_weight_format); + const auto src_info = TensorInfo(TensorShape(56U, 56U, 64U), 1, data_type, DataLayout::NHWC); + const auto weight_info = TensorInfo(TensorShape(64, 3U, 3U, 64U), 1, enable_fast_math ? DataType::BFLOAT16 : data_type, DataLayout::NHWC); + const auto bias_info = TensorInfo(TensorShape(64U), 1, data_type, DataLayout::NHWC); + auto dst_info = TensorInfo(TensorShape(56U, 56U, 64U), 1, data_type, DataLayout::NHWC); + const auto conv_info = PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR); + const WeightsInfo weights_info(false, 3U, 3U, 64U, false, query_weight_format); _kernel_found = bool(ConvolutionClass::has_opt_impl(_computed_weight_format, &src_info, &weight_info, - &bias_info, &dst_info, conv_info, weights_info)); + &bias_info, &dst_info, conv_info, weights_info, + /*dilation*/ Size2D(1U, 1U), /*act_info*/ ActivationLayerInfo(), enable_fast_math)); } protected: -- cgit v1.2.1