diff options
Diffstat (limited to 'tests')
23 files changed, 1018 insertions, 170 deletions
diff --git a/tests/BUILD.bazel b/tests/BUILD.bazel index 5763938d3c..7085f1facc 100644 --- a/tests/BUILD.bazel +++ b/tests/BUILD.bazel @@ -1,4 +1,4 @@ -# Copyright (c) 2023 Arm Limited. +# Copyright (c) 2024 Arm Limited. # # SPDX-License-Identifier: MIT # @@ -72,6 +72,7 @@ cc_binary( "NEON/*.h", "validation/NEON/**/*.cpp", "validation/NEON/**/*.h", + "validation/runtime/experimental/**/*.cpp", "*.cpp", "datasets/*.h", "instruments/*.h", diff --git a/tests/SConscript b/tests/SConscript index 0907c5713b..9f8bb54dec 100644 --- a/tests/SConscript +++ b/tests/SConscript @@ -1,7 +1,7 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -# Copyright (c) 2017-2023,2024 Arm Limited. +# Copyright (c) 2017-2023, 2024 Arm Limited. # # SPDX-License-Identifier: MIT # @@ -23,6 +23,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import os.path +import SCons Import('env') Import('vars') @@ -156,6 +157,9 @@ if env['neon']: files_validation += Glob(env['external_tests_dir'] + '/tests/validation/NEON/' + filter_pattern) files_validation += Glob('validation/cpu/unit/*.cpp') + # Add wrapper tests + files_validation += Glob('validation/runtime/experimental/*/' + filter_pattern) + extra_link_flags = [] if env['os'] == 'android': test_env.Append(LIBS = ["log"]) @@ -187,7 +191,29 @@ if env['fixed_format_kernels'] and test_env['validation_tests']: test_env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS']) if test_env['validation_tests']: - arm_compute_validation_framework = env.StaticLibrary('arm_compute_validation_framework', Glob('validation/reference/*.cpp') + Glob('validation/*.cpp'), LINKFLAGS=test_env['LINKFLAGS'], CXXFLAGS=test_env['CXXFLAGS'], LIBS= [ arm_compute_test_framework ]) + #The following set up only works for posix system, RANLIBCOM env variable isn't available on win32 HOST_OS + if test_env['HOST_OS'] == 'posix': + #Set up to use temp file for long command when building and linking libraries + test_env['TEMPFILE'] = SCons.Platform.TempFileMunge + + #To use temp file for any command, the following pattern should be used: + # env['COMMAND'] = "{$TEMPFILE('$COMMANDSTRING')}" + #See: https://github.com/SCons/scons/blob/05f2992377844bbfec9bcd4a9c7f5479c634b91b/SCons/Platform/__init__.py#L147 + #The commands' string are taken from https://github.com/SCons/scons + #The commands' explanations are taken from Scons userguide + + #The command line used to compile C++ source file to an object files + test_env['CXXCOM'] = "${TEMPFILE('"+ test_env['CXXCOM'] + "')}" + #The command line used to generate a static library from object files + test_env['ARCOM'] = "${TEMPFILE('"+ test_env['ARCOM'] + "')}" + #The command line used to index a static library archive + test_env['RANLIBCOM'] = "${TEMPFILE('"+ test_env['RANLIBCOM'] + "')}" + #The command line used to link object files into an executable + test_env['LINKCOM'] = "${TEMPFILE('"+ test_env['LINKCOM'] + "')}" + #Set up directory for temp files. To prevent permission issue, the temp files are in the same directory with output files + test_env['TEMPFILEDIR'] = test_env['build_dir'] + + arm_compute_validation_framework = test_env.StaticLibrary('arm_compute_validation_framework', Glob('validation/reference/*.cpp') + Glob('validation/*.cpp'), LINKFLAGS=test_env['LINKFLAGS'], CXXFLAGS=test_env['CXXFLAGS'], LIBS= [ arm_compute_test_framework ]) Depends(arm_compute_validation_framework , arm_compute_test_framework) program_objects = files_validation + common_objects diff --git a/tests/datasets/ScatterDataset.h b/tests/datasets/ScatterDataset.h index f7547ecc94..8fd4448d2d 100644 --- a/tests/datasets/ScatterDataset.h +++ b/tests/datasets/ScatterDataset.h @@ -113,6 +113,8 @@ private: std::vector<TensorShape> _dst_shapes{}; }; + +// 1D dataset for simple scatter tests. class Small1DScatterDataset final : public ScatterDataset { public: @@ -122,6 +124,104 @@ public: add_config(TensorShape(10U), TensorShape(2U), TensorShape(1U, 2U), TensorShape(10U)); } }; + +// This dataset represents the (m+1)-D updates/dst case. +class SmallScatterMultiDimDataset final : public ScatterDataset +{ +public: + SmallScatterMultiDimDataset() + { + // NOTE: Config is src, updates, indices, output. + // - In this config, the dim replaced is the final number (largest tensor dimension) + // - Largest "updates" dim should match y-dim of indices. + // - src/updates/dst should all have same number of dims. Indices should be 2D. + add_config(TensorShape(6U, 5U), TensorShape(6U, 2U), TensorShape(1U, 2U), TensorShape(6U, 5U)); + add_config(TensorShape(9U, 3U, 4U), TensorShape(9U, 3U, 2U), TensorShape(1U, 2U), TensorShape(9U, 3U, 4U)); + add_config(TensorShape(17U, 3U, 2U, 4U), TensorShape(17U, 3U, 2U, 7U), TensorShape(1U, 7U), TensorShape(17U, 3U, 2U, 4U)); + } +}; + +// This dataset represents the (m+1)-D updates tensor, (m+n)-d output tensor cases +class SmallScatterMultiIndicesDataset final : public ScatterDataset +{ +public: + SmallScatterMultiIndicesDataset() + { + // NOTE: Config is src, updates, indices, output. + // NOTE: indices.shape.x = src.num_dimensions - updates.num_dimensions + 1 + + // index length is 2 + add_config(TensorShape(6U, 5U, 2U), TensorShape(6U, 4U), TensorShape(2U, 4U), TensorShape(6U, 5U, 2U)); + add_config(TensorShape(17U, 3U, 3U, 2U), TensorShape(17U, 3U, 2U), TensorShape(2U, 2U), TensorShape(17U, 3U, 3U, 2U)); + add_config(TensorShape(11U, 3U, 3U, 2U, 4U), TensorShape(11U, 3U, 3U, 4U), TensorShape(2U, 4U), TensorShape(11U, 3U, 3U, 2U, 4U)); + add_config(TensorShape(5U, 4U, 3U, 3U, 2U, 4U), TensorShape(5U, 4U, 3U, 3U, 5U), TensorShape(2U, 5U), TensorShape(5U, 4U, 3U, 3U, 2U, 4U)); + + // index length is 3 + add_config(TensorShape(4U, 3U, 2U, 2U), TensorShape(4U, 2U), TensorShape(3U, 2U), TensorShape(4U, 3U, 2U, 2U)); + add_config(TensorShape(17U, 4U, 3U, 2U, 2U), TensorShape(17U, 4U, 4U), TensorShape(3U, 4U), TensorShape(17U, 4U, 3U, 2U, 2U)); + add_config(TensorShape(10U, 4U, 5U, 3U, 2U, 2U), TensorShape(10U, 4U, 5U, 3U), TensorShape(3U, 3U), TensorShape(10U, 4U, 5U, 3U, 2U, 2U)); + + // index length is 4 + add_config(TensorShape(35U, 4U, 3U, 2U, 2U), TensorShape(35U, 4U), TensorShape(4U, 4U), TensorShape(35U, 4U, 3U, 2U, 2U)); + add_config(TensorShape(10U, 4U, 5U, 3U, 2U, 2U), TensorShape(10U, 4U, 3U), TensorShape(4U, 3U), TensorShape(10U, 4U, 5U, 3U, 2U, 2U)); + + // index length is 5 + add_config(TensorShape(10U, 4U, 5U, 3U, 2U, 2U), TensorShape(10U, 3U), TensorShape(5U, 3U), TensorShape(10U, 4U, 5U, 3U, 2U, 2U)); + } +}; + +// This dataset represents the (m+k)-D updates tensor, (k+1)-d indices tensor and (m+n)-d output tensor cases +class SmallScatterBatchedDataset final : public ScatterDataset +{ +public: + SmallScatterBatchedDataset() + { + // NOTE: Config is src, updates, indices, output. + // NOTE: Updates/Indices tensors are now batched. + // NOTE: indices.shape.x = (updates_batched) ? (src.num_dimensions - updates.num_dimensions) + 2 : (src.num_dimensions - updates.num_dimensions) + 1 + // k is the number of batch dimensions + // k = 2 + add_config(TensorShape(6U, 5U), TensorShape(6U, 2U, 2U), TensorShape(1U, 2U, 2U), TensorShape(6U, 5U)); + add_config(TensorShape(5U, 5U, 4U, 2U, 2U), TensorShape(5U, 5U, 6U, 2U), TensorShape(3U, 6U, 2U), TensorShape(5U, 5U, 4U, 2U, 2U)); + + // k = 3 + add_config(TensorShape(6U, 5U), TensorShape(6U, 2U, 2U, 2U), TensorShape(1U, 2U, 2U, 2U), TensorShape(6U, 5U)); + add_config(TensorShape(5U, 5U, 4U, 2U, 2U), TensorShape(5U, 5U, 3U, 6U, 2U), TensorShape(3U, 3U, 6U, 2U), TensorShape(5U, 5U, 4U, 2U, 2U)); + + // k = 4 + add_config(TensorShape(5U, 5U, 4U, 2U, 2U), TensorShape(5U, 6U, 2U, 3U, 2U), TensorShape(4U, 6U, 2U, 3U, 2U), TensorShape(5U, 5U, 4U, 2U, 2U)); + + // k = 5 + add_config(TensorShape(5U, 5U, 4U, 2U, 2U), TensorShape(5U, 3U, 4U, 3U, 2U, 2U), TensorShape(4U, 3U, 4U, 3U, 2U, 2U), TensorShape(5U, 5U, 4U, 2U, 2U)); + } +}; + +class SmallScatterScalarDataset final : public ScatterDataset +{ +public: + // batched scalar case + SmallScatterScalarDataset() + { + add_config(TensorShape(6U, 5U), TensorShape(6U), TensorShape(2U, 6U), TensorShape(6U, 5U)); + add_config(TensorShape(6U, 5U), TensorShape(6U, 6U), TensorShape(2U, 6U, 6U), TensorShape(6U, 5U)); + add_config(TensorShape(3U, 3U, 6U, 5U), TensorShape(6U, 6U), TensorShape(4U, 6U, 6U), TensorShape(3U, 3U, 6U, 5U)); + } +}; + +// This dataset is for data types that does not require full testing. It contains selected tests from the above. +class SmallScatterMixedDataset final : public ScatterDataset +{ +public: + SmallScatterMixedDataset() + { + add_config(TensorShape(10U), TensorShape(2U), TensorShape(1U, 2U), TensorShape(10U)); + add_config(TensorShape(9U, 3U, 4U), TensorShape(9U, 3U, 2U), TensorShape(1U, 2U), TensorShape(9U, 3U, 4U)); + add_config(TensorShape(6U, 5U), TensorShape(6U, 6U), TensorShape(2U, 6U, 6U), TensorShape(6U, 5U)); + add_config(TensorShape(35U, 4U, 3U, 2U, 2U), TensorShape(35U, 4U), TensorShape(4U, 4U), TensorShape(35U, 4U, 3U, 2U, 2U)); + add_config(TensorShape(11U, 3U, 3U, 2U, 4U), TensorShape(11U, 3U, 3U, 4U), TensorShape(2U, 4U), TensorShape(11U, 3U, 3U, 2U, 4U)); + add_config(TensorShape(6U, 5U, 2U), TensorShape(6U, 2U, 2U), TensorShape(2U, 2U, 2U), TensorShape(6U, 5U, 2U)); + } +}; } // namespace datasets } // namespace test } // namespace arm_compute diff --git a/tests/framework/SConscript b/tests/framework/SConscript index 450ffd77b0..cca5169099 100644 --- a/tests/framework/SConscript +++ b/tests/framework/SConscript @@ -1,7 +1,7 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -# Copyright (c) 2017-2022 Arm Limited. +# Copyright (c) 2017-2022, 2024 Arm Limited. # # SPDX-License-Identifier: MIT # @@ -75,6 +75,26 @@ if not framework_env['mali']: else: framework_env.Append(CPPDEFINES = ['MALI_ENABLED']) +#The following set up only works for posix system, RANLIBCOM env variable isn't available on win32 HOST_OS +if framework_env['HOST_OS'] == 'posix': + #Set up to use temp file for long command when building and linking libraries + framework_env['TEMPFILE'] = SCons.Platform.TempFileMunge + + #To use temp file for any command, the following pattern should be used: + # env['COMMAND'] = "{$TEMPFILE('$COMMANDSTRING')}" + #See: https://github.com/SCons/scons/blob/05f2992377844bbfec9bcd4a9c7f5479c634b91b/SCons/Platform/__init__.py#L147 + #The commands' string are taken from https://github.com/SCons/scons + #The commands' explanations are taken from Scons userguide + + #The command line used to compile C++ source file to an object file + framework_env['CXXCOM'] = "${TEMPFILE('"+ framework_env['CXXCOM'] + "')}" + #The command line used to generate a static library from object files + framework_env['ARCOM'] = "${TEMPFILE('"+ framework_env['ARCOM'] + "')}" + #The command line used to index a static library archive + framework_env['RANLIBCOM'] = "${TEMPFILE('"+ framework_env['RANLIBCOM'] + "')}" + #Set up directory for temp files. To prevent permission issue, the temp files are in the same directory with output files + framework_env['TEMPFILEDIR'] = framework_env['build_dir'] + arm_compute_test_framework = framework_env.StaticLibrary('arm_compute_test_framework', files) Default(arm_compute_test_framework) diff --git a/tests/validation/CL/ScatterLayer.cpp b/tests/validation/CL/ScatterLayer.cpp index 9711671841..b1531eb64a 100644 --- a/tests/validation/CL/ScatterLayer.cpp +++ b/tests/validation/CL/ScatterLayer.cpp @@ -41,6 +41,8 @@ namespace validation namespace { RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for fp32 data type */ +RelativeTolerance<float> tolerance_f16(0.02f); /**< Tolerance value for comparing reference's output against implementation's output for fp16 data type */ +RelativeTolerance<int32_t> tolerance_int(0); /**< Tolerance value for comparing reference's output against implementation's output for integer data types */ } // namespace template <typename T> @@ -52,67 +54,243 @@ TEST_SUITE(CL) TEST_SUITE(Scatter) DATA_TEST_CASE(Validate, framework::DatasetMode::PRECOMMIT, zip( make("InputInfo", { TensorInfo(TensorShape(9U), 1, DataType::F32), // Mismatching data types - TensorInfo(TensorShape(15U), 1, DataType::F32), // Valid - TensorInfo(TensorShape(8U), 1, DataType::F32), - TensorInfo(TensorShape(217U), 1, DataType::F32), // Mismatch input/output dims. - TensorInfo(TensorShape(217U), 1, DataType::F32), // Updates dim higher than Input/Output dims. - TensorInfo(TensorShape(12U), 1, DataType::F32), // Indices wrong datatype. - }), - make("UpdatesInfo",{ TensorInfo(TensorShape(3U), 1, DataType::F16), - TensorInfo(TensorShape(15U), 1, DataType::F32), - TensorInfo(TensorShape(2U), 1, DataType::F32), - TensorInfo(TensorShape(217U), 1, DataType::F32), - TensorInfo(TensorShape(217U, 3U), 1, DataType::F32), - TensorInfo(TensorShape(2U), 1, DataType::F32), - }), - make("IndicesInfo",{ TensorInfo(TensorShape(1U, 3U), 1, DataType::S32), - TensorInfo(TensorShape(1U, 15U), 1, DataType::S32), - TensorInfo(TensorShape(1U, 2U), 1, DataType::S32), - TensorInfo(TensorShape(1U, 271U), 1, DataType::S32), - TensorInfo(TensorShape(1U, 271U), 1, DataType::S32), - TensorInfo(TensorShape(1U, 2U), 1 , DataType::F32) - }), - make("OutputInfo",{ TensorInfo(TensorShape(9U), 1, DataType::F16), - TensorInfo(TensorShape(15U), 1, DataType::F32), - TensorInfo(TensorShape(8U), 1, DataType::F32), - TensorInfo(TensorShape(271U, 3U), 1, DataType::F32), - TensorInfo(TensorShape(271U), 1, DataType::F32), - TensorInfo(TensorShape(12U), 1, DataType::F32) - }), + TensorInfo(TensorShape(15U), 1, DataType::F32), // Valid + TensorInfo(TensorShape(15U), 1, DataType::U8), // Valid + TensorInfo(TensorShape(8U), 1, DataType::F32), + TensorInfo(TensorShape(217U), 1, DataType::F32), // Mismatch input/output dims. + TensorInfo(TensorShape(217U), 1, DataType::F32), // Updates dim higher than Input/Output dims. + TensorInfo(TensorShape(12U), 1, DataType::F32), // Indices wrong datatype. + TensorInfo(TensorShape(9U, 3U, 4U), 1, DataType::F32), // Number of updates != number of indices + TensorInfo(TensorShape(17U, 3U, 3U, 2U), 1, DataType::F32), // index_len != (dst_dims - upt_dims + 1) + TensorInfo(TensorShape(17U, 3U, 3U, 2U, 2U, 2U), 1, DataType::F32), // index_len > 5 + }), + make("UpdatesInfo",{TensorInfo(TensorShape(3U), 1, DataType::F16), + TensorInfo(TensorShape(15U), 1, DataType::F32), + TensorInfo(TensorShape(15U), 1, DataType::U8), + TensorInfo(TensorShape(2U), 1, DataType::F32), + TensorInfo(TensorShape(217U), 1, DataType::F32), + TensorInfo(TensorShape(217U, 3U), 1, DataType::F32), + TensorInfo(TensorShape(2U), 1, DataType::F32), + TensorInfo(TensorShape(9U, 3U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(17U, 3U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(1U), 1, DataType::F32), + }), + make("IndicesInfo",{TensorInfo(TensorShape(1U, 3U), 1, DataType::S32), + TensorInfo(TensorShape(1U, 15U), 1, DataType::S32), + TensorInfo(TensorShape(1U, 15U), 1, DataType::S32), + TensorInfo(TensorShape(1U, 2U), 1, DataType::S32), + TensorInfo(TensorShape(1U, 271U), 1, DataType::S32), + TensorInfo(TensorShape(1U, 271U), 1, DataType::S32), + TensorInfo(TensorShape(1U, 2U), 1 , DataType::F32), + TensorInfo(TensorShape(1U, 4U), 1, DataType::S32), + TensorInfo(TensorShape(3U, 2U), 1, DataType::S32), + TensorInfo(TensorShape(6U, 2U), 1, DataType::S32), + }), + make("OutputInfo",{TensorInfo(TensorShape(9U), 1, DataType::F16), + TensorInfo(TensorShape(15U), 1, DataType::F32), + TensorInfo(TensorShape(15U), 1, DataType::U8), + TensorInfo(TensorShape(8U), 1, DataType::F32), + TensorInfo(TensorShape(271U, 3U), 1, DataType::F32), + TensorInfo(TensorShape(271U), 1, DataType::F32), + TensorInfo(TensorShape(12U), 1, DataType::F32), + TensorInfo(TensorShape(9U, 3U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(17U, 3U, 3U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(17U, 3U, 3U, 2U, 2U, 2U), 1, DataType::F32), + }), make("ScatterInfo",{ ScatterInfo(ScatterFunction::Add, false), ScatterInfo(ScatterFunction::Max, false), + ScatterInfo(ScatterFunction::Max, false), ScatterInfo(ScatterFunction::Min, false), ScatterInfo(ScatterFunction::Add, false), ScatterInfo(ScatterFunction::Update, false), ScatterInfo(ScatterFunction::Sub, false), - }), - make("Expected", { false, true, true, false, false, false })), + ScatterInfo(ScatterFunction::Sub, false), + ScatterInfo(ScatterFunction::Update, false), + ScatterInfo(ScatterFunction::Update, false), + }), + make("Expected", { false, true, true, true, false, false, false, false, false, false })), input_info, updates_info, indices_info, output_info, scatter_info, expected) { - const Status status = CLScatter::validate(&input_info.clone()->set_is_resizable(true), &updates_info.clone()->set_is_resizable(true), &indices_info.clone()->set_is_resizable(true), &output_info.clone()->set_is_resizable(true), scatter_info); + const Status status = CLScatter::validate(&input_info, &updates_info, &indices_info, &output_info, scatter_info); ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); } +const auto allScatterFunctions = make("ScatterFunction", + {ScatterFunction::Update, ScatterFunction::Add, ScatterFunction::Sub, ScatterFunction::Min, ScatterFunction::Max }); + TEST_SUITE(Float) TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small1DScatterDataset(), - make("DataType", {DataType::F32}), - make("ScatterFunction", {ScatterFunction::Update, ScatterFunction::Add, ScatterFunction::Sub, ScatterFunction::Min, ScatterFunction::Max }), - make("ZeroInit", {false}))) +FIXTURE_DATA_TEST_CASE(RunSmall, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(datasets::Small1DScatterDataset(), + make("DataType", {DataType::F32}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {true}))) { validate(CLAccessor(_target), _reference, tolerance_f32); } // With this test, src should be passed as nullptr. -FIXTURE_DATA_TEST_CASE(RunSmallZeroInit, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small1DScatterDataset(), - make("DataType", {DataType::F32}), - make("ScatterFunction", {ScatterFunction::Add}), - make("ZeroInit", {true}))) +FIXTURE_DATA_TEST_CASE(RunSmallZeroInit, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(datasets::Small1DScatterDataset(), + make("DataType", {DataType::F32}), + make("ScatterFunction", {ScatterFunction::Add}), + make("ZeroInit", {true}), + make("Inplace", {false}), + make("Padding", {true}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +// Updates/src/dst have same no. dims. +FIXTURE_DATA_TEST_CASE(RunSmallMultiDim, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMultiDimDataset(), + make("DataType", {DataType::F32}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {true}))) { validate(CLAccessor(_target), _reference, tolerance_f32); } + +// m+1-D to m+n-D cases +FIXTURE_DATA_TEST_CASE(RunSmallMultiIndices, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMultiIndicesDataset(), + make("DataType", {DataType::F32}), + make("ScatterFunction", {ScatterFunction::Update, ScatterFunction::Add }), + make("ZeroInit", {false}), + make("Inplace", {false, true}), + make("Padding", {true}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +// m+k, k-1-D m+n-D case +FIXTURE_DATA_TEST_CASE(RunSmallBatchedMultiIndices, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterBatchedDataset(), + make("DataType", {DataType::F32}), + make("ScatterFunction", {ScatterFunction::Update, ScatterFunction::Add}), + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {true}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +// m+k, k-1-D m+n-D case +FIXTURE_DATA_TEST_CASE(RunSmallScatterScalar, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterScalarDataset(), + make("DataType", {DataType::F32}), + make("ScatterFunction", {ScatterFunction::Update, ScatterFunction::Add}), + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {false}))) // NOTE: Padding not supported in this datset +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + TEST_SUITE_END() // FP32 + + +// NOTE: Padding is disabled for the SmallScatterMixedDataset due certain shapes not supporting padding. +// Padding is well tested in F32 Datatype test cases. + +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<half>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMixedDataset(), + make("DataType", {DataType::F16}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {false}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16); +} +TEST_SUITE_END() // FP16 TEST_SUITE_END() // Float + +TEST_SUITE(Integer) +TEST_SUITE(S32) +FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<int32_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMixedDataset(), + make("DataType", {DataType::S32}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {false}))) +{ + validate(CLAccessor(_target), _reference, tolerance_int); +} +TEST_SUITE_END() // S32 + +TEST_SUITE(S16) +FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<int16_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMixedDataset(), + make("DataType", {DataType::S16}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {false}))) +{ + validate(CLAccessor(_target), _reference, tolerance_int); +} +TEST_SUITE_END() // S16 + +TEST_SUITE(S8) +FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMixedDataset(), + make("DataType", {DataType::S8}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {false}))) +{ + validate(CLAccessor(_target), _reference, tolerance_int); +} +TEST_SUITE_END() // S8 + +TEST_SUITE(U32) +FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<uint32_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMixedDataset(), + make("DataType", {DataType::U32}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {false}))) +{ + validate(CLAccessor(_target), _reference, tolerance_int); +} +TEST_SUITE_END() // U32 + +TEST_SUITE(U16) +FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<uint16_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMixedDataset(), + make("DataType", {DataType::U16}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {false}))) +{ + validate(CLAccessor(_target), _reference, tolerance_int); +} +TEST_SUITE_END() // U16 + +TEST_SUITE(U8) +FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMixedDataset(), + make("DataType", {DataType::U8}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {false}))) +{ + validate(CLAccessor(_target), _reference, tolerance_int); +} +TEST_SUITE_END() // U8 +TEST_SUITE_END() // Integer + TEST_SUITE_END() // Scatter TEST_SUITE_END() // CL } // namespace validation diff --git a/tests/validation/CMakeLists.txt b/tests/validation/CMakeLists.txt index 448e96c4f9..b71787db60 100644 --- a/tests/validation/CMakeLists.txt +++ b/tests/validation/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2023 Arm Limited. +# Copyright (c) 2024 Arm Limited. # # SPDX-License-Identifier: MIT # @@ -142,5 +142,6 @@ if(ENABLE_NEON) NEON/UNIT/DynamicTensor.cpp NEON/UNIT/TensorAllocator.cpp NEON/UNIT/MemoryManager.cpp - NEON/UNIT/RuntimeContext.cpp) + NEON/UNIT/RuntimeContext.cpp + runtime/experimental/operators/CpuGemm.cpp) endif() diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp index 1f76925d96..7eada81ce5 100644 --- a/tests/validation/NEON/ConvolutionLayer.cpp +++ b/tests/validation/NEON/ConvolutionLayer.cpp @@ -147,6 +147,45 @@ const auto QuantizationData = make("QuantizationInfo", TEST_SUITE(NEON) TEST_SUITE(ConvolutionLayer) +DATA_TEST_CASE(SupportedTypes, framework::DatasetMode::ALL, zip( + make("DataType", { + DataType::F32, + DataType::QASYMM8, + DataType::QASYMM8, + DataType::QASYMM8_SIGNED + }), + make("WeightsDataType", { + DataType::F32, + DataType::QASYMM8, + DataType::QASYMM8_SIGNED, + DataType::QASYMM8 + }), + make("Expected", + { + true, + true, + true, + false + })), +data_type_const, weights_data_type_const, expected_const) +{ + TensorInfo input_info = TensorInfo(TensorShape(3U, 3U, 1U), 1, data_type_const); + TensorInfo weights_info = TensorInfo(TensorShape(2U, 2U, 1U, 1U), 1, weights_data_type_const); + TensorInfo output_info = TensorInfo(TensorShape(2U, 2U, 1U), 1, data_type_const); + + input_info.set_quantization_info(arm_compute::QuantizationInfo(1, 0)); + weights_info.set_quantization_info(arm_compute::QuantizationInfo(1, 0)); + output_info.set_quantization_info(arm_compute::QuantizationInfo(1, 0)); + + Status status = NEConvolutionLayer::validate( + &input_info, + &weights_info, + nullptr, + &output_info, + PadStrideInfo()); + + ARM_COMPUTE_EXPECT(bool(status) == expected_const, framework::LogLevel::ERRORS); +} // *INDENT-OFF* // clang-format off @@ -257,7 +296,7 @@ TEST_CASE(MemoryInjection, framework::DatasetMode::ALL) for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) { - ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(reinterpret_cast<float *>(result_0.buffer())[i] == reinterpret_cast<float *>(result_1.buffer())[i], framework::LogLevel::ERRORS); } } @@ -303,7 +342,7 @@ TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL) for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) { - ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(reinterpret_cast<float *>(result_0.buffer())[i] == reinterpret_cast<float *>(result_1.buffer())[i], framework::LogLevel::ERRORS); } } @@ -580,7 +619,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, frame /// It's enough to run the activations for a single weight/input combination and data type because /// activation function is called on top of the winograd output as a separate operator -/// TODO: Enable after COMPMID-6573 is resolved +/// TODO(COMPMID-6573): Enable after COMPMID-6573 is resolved FIXTURE_DATA_TEST_CASE(RunActivations, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::DISABLED, combine( make("Input", TensorShape(3U, 3U, 32U)), @@ -1119,7 +1158,7 @@ TEST_CASE(MemoryInjection, framework::DatasetMode::ALL) auto result_1 = run_conv(); for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) { - ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(reinterpret_cast<float *>(result_0.buffer())[i] == reinterpret_cast<float *>(result_1.buffer())[i], framework::LogLevel::ERRORS); } } @@ -1160,7 +1199,7 @@ TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL) auto result_1 = run_conv(); for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) { - ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(reinterpret_cast<float *>(result_0.buffer())[i] == reinterpret_cast<float *>(result_1.buffer())[i], framework::LogLevel::ERRORS); } } @@ -1251,12 +1290,14 @@ FIXTURE_DATA_TEST_CASE(RunVeryLarge, NEGEMMConvolutionLayerFixture<float>, frame TEST_SUITE_END() // FP32 TEST_SUITE_END() // Float -// TODO: COMPMID-6596 Extend quantized tests with at least one suite where the weight is padded (the legacy case, see floating point's RunPaddedWeights) +// TODO(COMPMID-6573): Extend quantized tests with at least one suite where the weight is padded (the legacy case, see floating point's RunPaddedWeights) template <typename T> using NEGEMMConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEConvolutionLayer, T>; template <typename T> using NEGEMMConvolutionLayerQuantizedMixedDataLayoutFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEConvolutionLayer, T, true>; +using NEGEMMConvolutionLayerQuantizedMixedSignFixture = ConvolutionValidationQuantizedMixedTypeFixture<Tensor, Accessor, NEConvolutionLayer, uint8_t, int8_t>; + template <typename T> using NEGEMMConvolutionLayerQuantizedPerChannelFixture = ConvolutionValidationQuantizedPerChannelFixture<Tensor, Accessor, NEConvolutionLayer, T, int8_t>; @@ -1332,6 +1373,50 @@ FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEGEMMConvolutionLayerQuantizedFixtur } TEST_SUITE_END() // QASYMM8_SIGNED +TEST_SUITE(QASYMM8_MIXED) +FIXTURE_DATA_TEST_CASE( + RunSmall, + NEGEMMConvolutionLayerQuantizedMixedSignFixture, + framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", {true})), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("WeightsDataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("DataLayout", {DataLayout::NCHW, DataLayout::NHWC})), + framework::dataset::make("QuantizationInfoIfActivationEnabled", +{QuantizationInfo(2.f / 255.f, 10)})), +framework::dataset::make("WeightQuantizationInfoIfActivationEnabled", +{QuantizationInfo(2.f / 255.f, 10)})), +QuantizedActivationFunctionsDataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE( + RunMixedDataLayout, + NEGEMMConvolutionLayerQuantizedMixedSignFixture, + framework::DatasetMode::ALL, + combine( + framework::dataset::make("Input", TensorShape(23U, 27U, 5U)), + framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U)), + framework::dataset::make("Bias", TensorShape(2U)), + framework::dataset::make("Output", TensorShape(11U, 25U, 2U)), + framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0)), + framework::dataset::make("Dilation", Size2D(1, 1)), + framework::dataset::make("ReshapeWeights", {true}), + framework::dataset::make("DataType", DataType::QASYMM8), + framework::dataset::make("WeightsDataType", DataType::QASYMM8_SIGNED), + framework::dataset::make("DataLayout", {DataLayout::NCHW, DataLayout::NHWC}), + framework::dataset::make("QuantizationInfoIfActivationEnabled", {QuantizationInfo(2.f / 255.f, 10)}), + framework::dataset::make("WeightQuantizationInfoIfActivationEnabled", {QuantizationInfo(2.f / 255.f, 10)}), + QuantizedActivationFunctionsDataset) + ) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +TEST_SUITE_END() // QASYMM8_MIXED + TEST_SUITE(QSYMM8_PER_CHANNEL) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), @@ -1357,6 +1442,27 @@ FIXTURE_DATA_TEST_CASE(RunSmallSigned, NEGEMMConvolutionLayerQuantizedPerChannel // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); } + +FIXTURE_DATA_TEST_CASE(MemoryStressLargeChannels, NEGEMMConvolutionLayerQuantizedPerChannelFixture<int8_t>, + framework::DatasetMode::ALL, + combine( + make("In", TensorShape(1U)), + make("Weights", TensorShape(1U, 1U, 1U, 17000U)), + make("Biases", TensorShape(17000U)), + make("Out", TensorShape(1U, 1U, 17000U)), + make("Info", PadStrideInfo(1, 1, 0, 0)), + make("Dilation", Size2D(1, 1)), + make("ReshapeWeights", { true }), + make("DataType", { DataType::QASYMM8_SIGNED }), + make("DataLayout", { DataLayout::NHWC }), + make("QuantizationInfo", QuantizationInfo(0.5f, 10)), + make("ActivationInfo", ActivationLayerInfo()), + make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} + TEST_SUITE_END() // QSYMM8_PER_CHANNEL TEST_SUITE_END() // Quantized @@ -1415,7 +1521,7 @@ TEST_CASE(MemoryInjection, framework::DatasetMode::ALL) auto result_1 = run_conv(); for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) { - ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(reinterpret_cast<float *>(result_0.buffer())[i] == reinterpret_cast<float *>(result_1.buffer())[i], framework::LogLevel::ERRORS); } } @@ -1455,7 +1561,7 @@ TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL) auto result_1 = run_conv(); for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) { - ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(reinterpret_cast<float *>(result_0.buffer())[i] == reinterpret_cast<float *>(result_1.buffer())[i], framework::LogLevel::ERRORS); } } diff --git a/tests/validation/NEON/GEMMLowp.cpp b/tests/validation/NEON/GEMMLowp.cpp index 9b1da61ed7..61202ee2b7 100644 --- a/tests/validation/NEON/GEMMLowp.cpp +++ b/tests/validation/NEON/GEMMLowp.cpp @@ -141,20 +141,23 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip( TensorInfo(TensorShape(20U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Invalid dimensions TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Invalid dimensions TensorInfo(TensorShape(16U, 32U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), + TensorInfo(TensorShape(16U, 32U), 1, DataType::QASYMM8_SIGNED, QuantizationInfo(1.f/255, 10)), // Invalid types }), make("InputBInfo",{ TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)), TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)), TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)), TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)), TensorInfo(TensorShape(64U, 16U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)), + TensorInfo(TensorShape(64U, 16U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)), }), make("OutputInfo",{ TensorInfo(TensorShape(33U, 13U), 1, DataType::S32), TensorInfo(TensorShape(33U, 13U), 1, DataType::S32), TensorInfo(TensorShape(33U, 13U), 1, DataType::S32), TensorInfo(TensorShape(8U, 11U), 1, DataType::S32), TensorInfo(TensorShape(64U, 32U), 1, DataType::S32), + TensorInfo(TensorShape(64U, 32U), 1, DataType::S32), }), - make("Expected", { true, false, false, false, true })), + make("Expected", { true, false, false, false, true, false })), a_info, b_info, output_info, expected) { // Lock tensors @@ -359,14 +362,53 @@ TEST_SUITE_END() // DynamicQuantization #ifdef __aarch64__ // Deqaunt tests involve returning F32 from the MatrixMultiplyCore kernels and is only implemented in aarch64 TEST_SUITE(Dequant) +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip( + make("InputAInfo", { + TensorInfo(TensorShape(16U, 32U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), + TensorInfo(TensorShape(16U, 32U), 1, DataType::QASYMM8_SIGNED, QuantizationInfo(1.f/255, 10)), + TensorInfo(TensorShape(16U, 32U), 1, DataType::QASYMM8_SIGNED, QuantizationInfo(1.f/255, 10)), // Invalid types + }), + make("InputBInfo",{ + TensorInfo(TensorShape(64U, 16U), 1, DataType::QASYMM8_SIGNED, QuantizationInfo(1.f/256, 10)), + TensorInfo(TensorShape(64U, 16U), 1, DataType::QASYMM8_SIGNED, QuantizationInfo(1.f/256, 10)), + TensorInfo(TensorShape(64U, 16U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)), + }), + make("OutputInfo",{ + TensorInfo(TensorShape(64U, 32U), 1, DataType::F32), + TensorInfo(TensorShape(64U, 32U), 1, DataType::F32), + TensorInfo(TensorShape(64U, 32U), 1, DataType::F32), + }), + make("Expected", { true, true, false })), + a_info, b_info, output_info, expected) +{ + // Lock tensors + Status status = NEGEMMLowpMatrixMultiplyCore::validate(&a_info.clone()->set_is_resizable(false), + &b_info.clone()->set_is_resizable(false), + nullptr, + &output_info.clone()->set_is_resizable(false)); + ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); +} + constexpr AbsoluteTolerance<float> tolerance_dequantized(0.01f); -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpDequantizedMatrixMultiplyValidationFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset()) +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpDequantizedMatrixMultiplyValidationFixture, framework::DatasetMode::ALL, + combine( + datasets::SmallGEMMLowpDataset(), + make("DataTypeA", {DataType::QASYMM8_SIGNED, DataType::QASYMM8}), + make("DataTypeB", DataType::QASYMM8_SIGNED), + make("accumulate", {true, false}) + )) { // Validate output validate(Accessor(_target), _reference, tolerance_dequantized); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpDequantizedMatrixMultiplyValidationFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpDataset()) +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpDequantizedMatrixMultiplyValidationFixture, framework::DatasetMode::NIGHTLY, + combine( + datasets::LargeGEMMLowpDataset(), + make("DataTypeA", {DataType::QASYMM8_SIGNED, DataType::QASYMM8}), + make("DataTypeB", DataType::QASYMM8_SIGNED), + make("accumulate", {false}) + )) { // Validate output validate(Accessor(_target), _reference, tolerance_dequantized); diff --git a/tests/validation/NEON/ReorderLayer.cpp b/tests/validation/NEON/ReorderLayer.cpp index 42fa0f8b00..839ad0ac92 100644 --- a/tests/validation/NEON/ReorderLayer.cpp +++ b/tests/validation/NEON/ReorderLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -33,6 +33,7 @@ #include "tests/validation/Validation.h" #include "tests/validation/fixtures/ReorderFixture.h" #include "src/core/NEON/kernels/NEReorderKernel.h" +#include "src/core/NEON/kernels/arm_gemm/utils.hpp" namespace arm_compute { @@ -40,6 +41,8 @@ namespace test { namespace validation { +using framework::dataset::make; + TEST_SUITE(NEON) TEST_SUITE(ReorderLayer) @@ -48,13 +51,46 @@ using NEReorderLayerAlias = ReorderValidationFixture<Tensor, Accessor, NEReorder TEST_SUITE(FP32) #if defined(ARM_COMPUTE_ENABLE_SVE) -FIXTURE_DATA_TEST_CASE(RunBlock8, NEReorderLayerAlias<float>, framework::DatasetMode::ALL, combine(datasets::ReorderLayerDatasetBlock8(), framework::dataset::make("DataType", DataType::F32))) +DATA_TEST_CASE(ValidateReorderOHWIo8, framework::DatasetMode::ALL, combine( + zip( + make("InShape",{ TensorShape(10U, 9U), TensorShape(234U, 301U) }), + make("OutShape", { TensorShape(10U, 16U), TensorShape(234U, 304U) }) + ), + zip( + make("InputWeightFormat", {WeightFormat::OHWI}), + make("OutputWeightFormat", {WeightFormat::OHWIo8}) + )), + input_shape, output_shape, input_wf, output_wf) +{ + if(Scheduler::get().cpu_info().has_sve()){ + arm_compute::NEReorderLayer reorder_layer; + int vector_length = arm_gemm::utils::get_vector_length<float>(); + bool expected_bool_status = false; + if (vector_length == 8) + { + expected_bool_status = true; + } + + TensorInfo input_tensor_info(input_shape, 1, DataType::F32); + TensorInfo output_tensor_info(output_shape, 1, DataType::F32); + + Status status = reorder_layer.validate(&input_tensor_info, &output_tensor_info, input_wf, output_wf); + + ARM_COMPUTE_EXPECT((expected_bool_status == bool(status)), framework::LogLevel::ERRORS); + } +} + +FIXTURE_DATA_TEST_CASE(RunBlock8, NEReorderLayerAlias<float>, framework::DatasetMode::ALL, combine(datasets::ReorderLayerDatasetBlock8(), make("DataType", DataType::F32))) { // Validate output - validate(Accessor(_target), _reference); + if (_hardware_supports) + { + validate(Accessor(_target), _reference); + } } #endif // ARM_COMPUTE_ENABLE_SVE -FIXTURE_DATA_TEST_CASE(RunBlock4, NEReorderLayerAlias<float>, framework::DatasetMode::ALL, combine(datasets::ReorderLayerDatasetBlock4(), framework::dataset::make("DataType", DataType::F32))) + +FIXTURE_DATA_TEST_CASE(RunBlock4, NEReorderLayerAlias<float>, framework::DatasetMode::ALL, combine(datasets::ReorderLayerDatasetBlock4(), make("DataType", DataType::F32))) { // Validate output validate(Accessor(_target), _reference); @@ -68,4 +104,4 @@ TEST_SUITE_END() // NEON } // namespace test } // namespace arm_compute -#endif // defined(__aarch64__)
\ No newline at end of file +#endif // defined(__aarch64__) diff --git a/tests/validation/NEON/SoftmaxLayer.cpp b/tests/validation/NEON/SoftmaxLayer.cpp index 8da5a0d953..94d0866c38 100644 --- a/tests/validation/NEON/SoftmaxLayer.cpp +++ b/tests/validation/NEON/SoftmaxLayer.cpp @@ -145,7 +145,7 @@ DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, cpu_isa.fp16 = (data_type == DataType::F16); const auto *selected_impl = CpuSoftmaxKernel::get_implementation( - SoftmaxKernelDataTypeISASelectorData{ data_type, cpu_isa, false /* is_log */, 0 /* axis */}, + SoftmaxKernelDataTypeISASelectorData{ data_type, cpu_isa, false /* is_log */, 0 /* axis */, CPUInfo::get().get_sme2_vector_length()}, cpu::KernelSelectionType::Preferred); ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl); diff --git a/tests/validation/NEON/UNIT/RuntimeContext.cpp b/tests/validation/NEON/UNIT/RuntimeContext.cpp index 819811943d..e0d45c639a 100644 --- a/tests/validation/NEON/UNIT/RuntimeContext.cpp +++ b/tests/validation/NEON/UNIT/RuntimeContext.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -48,6 +48,19 @@ namespace validation { TEST_SUITE(NEON) TEST_SUITE(UNIT) +#if defined(ARM_COMPUTE_OPENMP_SCHEDULER) && !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__) +TEST_CASE(CpuCapacity, framework::DatasetMode::ALL) +{ + CPUInfo& ci = arm_compute::Scheduler::get().cpu_info(); + const uint32_t nonlittle_num_cpus = ci.get_cpu_num_excluding_little(); + const uint32_t num_threads = arm_compute::Scheduler::get().num_threads(); + + ARM_COMPUTE_EXPECT(num_threads<=nonlittle_num_cpus , framework::LogLevel::ERRORS); +} +#endif /* defined(ARM_COMPUTE_OPENMP_SCHEDULER) && !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/ + TEST_SUITE(RuntimeContext) TEST_CASE(Scheduler, framework::DatasetMode::ALL) diff --git a/tests/validation/UNIT/CPPScheduler.cpp b/tests/validation/UNIT/CPPScheduler.cpp index 52431653b5..6a3f6819fc 100644 --- a/tests/validation/UNIT/CPPScheduler.cpp +++ b/tests/validation/UNIT/CPPScheduler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -68,8 +68,7 @@ public: TEST_SUITE(UNIT) TEST_SUITE(CPPScheduler) - -#if !defined(BARE_METAL) +#if defined(ARM_COMPUTE_CPP_SCHEDULER) && !defined(BARE_METAL) TEST_CASE(RethrowException, framework::DatasetMode::ALL) { CPPScheduler scheduler; @@ -87,7 +86,6 @@ TEST_CASE(RethrowException, framework::DatasetMode::ALL) } ARM_COMPUTE_EXPECT_FAIL("Expected exception not caught", framework::LogLevel::ERRORS); } -#endif // !defined(BARE_METAL) - +#endif // defined(ARM_COMPUTE_CPP_SCHEDULER) && !defined(BARE_METAL) TEST_SUITE_END() TEST_SUITE_END() diff --git a/tests/validation/fixtures/ConvolutionLayerFixture.h b/tests/validation/fixtures/ConvolutionLayerFixture.h index 0622e5e6f0..939ac032cd 100644 --- a/tests/validation/fixtures/ConvolutionLayerFixture.h +++ b/tests/validation/fixtures/ConvolutionLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -480,6 +480,31 @@ public: }; template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename TW> +class ConvolutionValidationQuantizedMixedTypeFixture + : public ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, TW> +{ +public: + void setup(TensorShape input_shape, + TensorShape weights_shape, + TensorShape bias_shape, + TensorShape output_shape, + PadStrideInfo info, + Size2D dilation, + bool reshape_weights, + DataType data_type, + DataType weights_data_type, + DataLayout data_layout, + QuantizationInfo quantization_info, + QuantizationInfo weight_quantization_info, + ActivationLayerInfo act_info) + { + ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, TW>::setup( + input_shape, weights_shape, bias_shape, output_shape, info, dilation, reshape_weights, data_type, + weights_data_type, data_layout, quantization_info, weight_quantization_info, act_info); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename TW> class ConvolutionValidationQuantizedPerChannelFixture : public ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, TW> { public: diff --git a/tests/validation/fixtures/DirectConvolution3DFixture.h b/tests/validation/fixtures/DirectConvolution3DFixture.h index e80ad2f54f..e27a41a23b 100644 --- a/tests/validation/fixtures/DirectConvolution3DFixture.h +++ b/tests/validation/fixtures/DirectConvolution3DFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -46,6 +46,7 @@ class DirectConvolution3DValidationGenericFixture : public framework::Fixture { public: using TBias = typename std::conditional < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int32_t, T >::type; + using TAcc = typename std::conditional < std::is_integral<T>::value, int32_t, float >::type; void setup(const TensorShape &input_shape, int stride_x, int stride_y, int stride_z, int pad_x, int pad_y, int pad_z, unsigned int kernel_width, int kernel_height, int kernel_depth, unsigned int num_kernels, bool has_bias, const ActivationLayerInfo &act_info, const DataType &data_type, const DataLayout &data_layout, @@ -150,7 +151,7 @@ protected: fill(bias, 2); } - return reference::activation_layer(reference::conv3d<T, TBias>(src, weights, bias, dst, conv3d_info), conv3d_info.act_info); + return reference::activation_layer(reference::conv3d<T, TBias, TAcc>(src, weights, bias, dst, conv3d_info), conv3d_info.act_info); } TensorType _target{}; diff --git a/tests/validation/fixtures/GEMMLowpFixture.h b/tests/validation/fixtures/GEMMLowpFixture.h index 11a491faa7..7931d8467d 100644 --- a/tests/validation/fixtures/GEMMLowpFixture.h +++ b/tests/validation/fixtures/GEMMLowpFixture.h @@ -31,7 +31,7 @@ #include "tests/validation/Validation.h" #include "tests/validation/reference/GEMMLowp.h" #include "tests/validation/reference/ArithmeticOperations.h" -#include "tests/validation/reference/QuantizationLayer.h" +#include "tests/validation/reference/DequantizationLayer.h" #include <cstdint> #include <vector> @@ -97,8 +97,7 @@ TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape bool accumulate = false, bool dynamic_qinfo = false, DataType data_type_output = DataType::UNKNOWN) { ARM_COMPUTE_ASSERT(is_data_type_quantized_asymmetric(data_type_a)); - ARM_COMPUTE_ASSERT(data_type_a == data_type_b); - // If unknown, set to sensible defaults + // If unknown, set to sensible defaults if (data_type_output == DataType::UNKNOWN) { data_type_output = output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : data_type_a; } @@ -185,7 +184,6 @@ SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, con DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8, const TensorFillInfo& finfo = TensorFillInfo()) { ARM_COMPUTE_ASSERT(is_data_type_quantized_asymmetric(data_type_a)); - ARM_COMPUTE_ASSERT(data_type_a == data_type_b); TensorShape shape_a_to_use = shape_a; if(reinterpret_input_as_3d) { @@ -472,37 +470,63 @@ template <typename TensorType, typename AccessorType, typename FunctionType, boo class GEMMLowpDequantizedMatrixMultiplyValidationFixture : public framework::Fixture { public: - void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset) + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, DataType data_type_a, DataType data_type_b, bool accumulate) { - // Accumulation is supported for Int8/UInt8 only in aarch64 - bool accumulate = true; - // Accumulation is not supported for Int8/UInt8 in aarch32 -#ifdef __arm__ - accumulate = false; -#endif //__arm__ - bool dynamic_qinfo = false; + const bool dynamic_qinfo = false; const auto a_qinfo = QuantizationInfo(1.0f / 255, a_offset); const auto b_qinfo = QuantizationInfo(5.0f / 255, b_offset); TensorFillInfo finfo; - _target = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate, dynamic_qinfo); - _reference = compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate); + _target = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type_a, data_type_b, finfo, + accumulate, dynamic_qinfo); + _reference = compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type_a, data_type_b, + finfo, accumulate, dynamic_qinfo); } protected: - TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, const bool accumulate, const bool dynamic_qinfo) + TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, DataType data_type_a, DataType data_type_b, const TensorFillInfo& finfo, const bool accumulate, const bool dynamic_qinfo) { const auto output_qinfo = QuantizationInfo(); - return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, GEMMLowpOutputStageInfo(), false, finfo, accumulate, dynamic_qinfo, DataType::F32); + return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, data_type_a, data_type_b, GEMMLowpOutputStageInfo(), false, finfo, accumulate, dynamic_qinfo, DataType::F32); } - SimpleTensor<float> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, bool accumulate) + SimpleTensor<float> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, DataType data_type_a, DataType data_type_b, const TensorFillInfo& finfo, bool accumulate, const bool dynamic_qinfo) { - SimpleTensor<int32_t> s32_ref_output = compute_gemmlowp_reference<reinterpret_input_as_3d, int8_t, int8_t, false, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, - DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, finfo); + QuantizationInfo s32_ref_output_quant_info = QuantizationInfo(a_qinfo.uniform().scale * b_qinfo.uniform().scale, 0, dynamic_qinfo); + + SimpleTensor<int32_t> s32_ref_output; + if (data_type_a == DataType::QASYMM8) + { + if (data_type_b == DataType::QASYMM8) + { + s32_ref_output = compute_gemmlowp_reference<reinterpret_input_as_3d, uint8_t, uint8_t, false, false, run_twice>( + shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type_a, data_type_b, finfo); + } + else + { + ARM_COMPUTE_ERROR_ON(data_type_b != DataType::QASYMM8_SIGNED); + s32_ref_output = compute_gemmlowp_reference<reinterpret_input_as_3d, uint8_t, int8_t, false, false, run_twice>( + shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type_a, data_type_b, finfo); + } + } + else + { + ARM_COMPUTE_ERROR_ON(data_type_a != DataType::QASYMM8_SIGNED); + if (data_type_b == DataType::QASYMM8) + { + ARM_COMPUTE_ERROR("QASYMM8_SIGNED input with QASYMM8 weights not supported"); + } + else + { + ARM_COMPUTE_ERROR_ON(data_type_b != DataType::QASYMM8_SIGNED); + s32_ref_output = compute_gemmlowp_reference<reinterpret_input_as_3d, int8_t, int8_t, false, false, run_twice>( + shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type_a, data_type_b, finfo); + } + } + + s32_ref_output.quantization_info(s32_ref_output_quant_info); SimpleTensor<float> f32_ref_output(s32_ref_output.shape(), DataType::F32); - QuantizationInfo dst_quant_info = QuantizationInfo(a_qinfo.uniform().scale * b_qinfo.uniform().scale, 0); - f32_ref_output = reference::quantization_layer<int32_t, float>(s32_ref_output, DataType::F32, dst_quant_info); + f32_ref_output = reference::dequantization_layer<float, int32_t>(s32_ref_output); if (accumulate) { diff --git a/tests/validation/fixtures/ReorderFixture.h b/tests/validation/fixtures/ReorderFixture.h index 36e62696bc..8e28484c48 100644 --- a/tests/validation/fixtures/ReorderFixture.h +++ b/tests/validation/fixtures/ReorderFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ACL_TESTS_VALIDATION_FIXTURES_REORDERFIXTURE -#define ACL_TESTS_VALIDATION_FIXTURES_REORDERFIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_REORDERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_REORDERFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -32,6 +32,7 @@ #include "tests/framework/Asserts.h" #include "tests/framework/Fixture.h" #include "tests/validation/reference/Reorder.h" +#include "src/core/NEON/kernels/arm_gemm/utils.hpp" namespace arm_compute { @@ -44,10 +45,23 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ReorderValidationFixture : public framework::Fixture { public: + void check_hardware_supports(WeightFormat output_wf){ + if(!Scheduler::get().cpu_info().has_sve() && output_wf!=WeightFormat::OHWIo4){ + _hardware_supports = false; + } + if (Scheduler::get().cpu_info().has_sve() && arm_gemm::utils::get_vector_length<float>() != 8 && output_wf==WeightFormat::OHWIo8) + { + _hardware_supports = false; + } + } + void setup(TensorShape input_shape, TensorShape output_shape, WeightFormat input_wf, WeightFormat output_wf, DataType data_type) { - _target = compute_target(input_shape, output_shape, input_wf, output_wf, data_type); - _reference = compute_reference(input_shape, output_shape, output_wf, data_type); + check_hardware_supports(output_wf); + if (_hardware_supports){ + _target = compute_target(input_shape, output_shape, input_wf, output_wf, data_type); + _reference = compute_reference(input_shape, output_shape, output_wf, data_type); + } } protected: @@ -98,6 +112,7 @@ public: return reference::reorder_layer<T>(src, output_shape, output_wf); } + bool _hardware_supports = true; TensorType _target{}; SimpleTensor<T> _reference{}; }; @@ -105,4 +120,4 @@ public: } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ACL_TESTS_VALIDATION_FIXTURES_REORDERFIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_REORDERFIXTURE_H diff --git a/tests/validation/fixtures/ScatterLayerFixture.h b/tests/validation/fixtures/ScatterLayerFixture.h index 451a1e1416..af161ef98b 100644 --- a/tests/validation/fixtures/ScatterLayerFixture.h +++ b/tests/validation/fixtures/ScatterLayerFixture.h @@ -29,6 +29,7 @@ #include "tests/Globals.h" #include "tests/framework/Asserts.h" #include "tests/framework/Fixture.h" +#include "tests/validation/Helpers.h" #include "tests/validation/Validation.h" #include "tests/validation/reference/ScatterLayer.h" #include "tests/SimpleTensor.h" @@ -46,21 +47,46 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ScatterGenericValidationFixture : public framework::Fixture { public: - void setup(TensorShape src_shape, TensorShape updates_shape, TensorShape indices_shape, TensorShape out_shape, DataType data_type, ScatterInfo scatter_info, QuantizationInfo src_qinfo = QuantizationInfo(), QuantizationInfo o_qinfo = QuantizationInfo()) + void setup(TensorShape src_shape, TensorShape updates_shape, TensorShape indices_shape, + TensorShape out_shape, DataType data_type, ScatterInfo scatter_info, bool inplace, bool padding, + QuantizationInfo src_qinfo = QuantizationInfo(), QuantizationInfo o_qinfo = QuantizationInfo()) { - _target = compute_target(src_shape, updates_shape, indices_shape, out_shape, data_type, scatter_info, src_qinfo, o_qinfo); + // this is for improving randomness across tests + _hash = src_shape[0] + src_shape[1] + src_shape[2] + src_shape[3] + src_shape[4] + src_shape[5] + + updates_shape[0] + updates_shape[1] + updates_shape[2] + updates_shape[3] + + updates_shape[4] + updates_shape[5] + + indices_shape[0] + indices_shape[1] + indices_shape[2] + indices_shape[3]; + + _target = compute_target(src_shape, updates_shape, indices_shape, out_shape, data_type, scatter_info, inplace, padding, src_qinfo, o_qinfo); _reference = compute_reference(src_shape, updates_shape, indices_shape, out_shape, data_type,scatter_info, src_qinfo , o_qinfo); } protected: template <typename U> - void fill(U &&tensor, int i, float lo = -1.f, float hi = 1.f) + void fill(U &&tensor, int i) { switch(tensor.data_type()) { case DataType::F32: + case DataType::F16: + { + std::uniform_real_distribution<float> distribution(-10.f, 10.f); + library->fill(tensor, distribution, i); + break; + } + case DataType::S32: + case DataType::S16: + case DataType::S8: + { + std::uniform_int_distribution<int32_t> distribution(-100, 100); + library->fill(tensor, distribution, i); + break; + } + case DataType::U32: + case DataType::U16: + case DataType::U8: { - std::uniform_real_distribution<float> distribution(lo, hi); + std::uniform_int_distribution<uint32_t> distribution(0, 200); library->fill(tensor, distribution, i); break; } @@ -77,11 +103,13 @@ protected: void fill_indices(U &&tensor, int i, const TensorShape &shape) { // Calculate max indices the shape should contain. Add an arbitrary value to allow testing for some out of bounds values (In this case min dimension) - const int32_t max = std::max({shape[0] , shape[1], shape[2]}); - library->fill_tensor_uniform(tensor, i, static_cast<int32_t>(-2), static_cast<int32_t>(max)); + const int32_t max = std::min({shape[0] , shape[1], shape[2]}) + 1; + library->fill_tensor_uniform(tensor, i, static_cast<int32_t>(0), static_cast<int32_t>(max)); } - TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c, const TensorShape &out_shape, DataType data_type, const ScatterInfo info, QuantizationInfo a_qinfo, QuantizationInfo o_qinfo) + TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c, + const TensorShape &out_shape, DataType data_type, const ScatterInfo info, bool inplace, bool padding, + QuantizationInfo a_qinfo, QuantizationInfo o_qinfo) { // 1. Create relevant tensors using ScatterInfo data structure. // ---------------------------------------------------- @@ -94,14 +122,22 @@ protected: FunctionType scatter; // Configure operator - // When scatter_info.zero_initialization is true, pass nullptr to scatter function. + // When scatter_info.zero_initialization is true, pass nullptr for src + // because dst does not need to be initialized with src values. if(info.zero_initialization) { scatter.configure(nullptr, &updates, &indices, &dst, info); } else { - scatter.configure(&src, &updates, &indices, &dst, info); + if(inplace) + { + scatter.configure(&src, &updates, &indices, &src, info); + } + else + { + scatter.configure(&src, &updates, &indices, &dst, info); + } } // Assertions @@ -110,48 +146,92 @@ protected: ARM_COMPUTE_ASSERT(indices.info()->is_resizable()); ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + if(padding) + { + add_padding_x({ &src, &updates, &indices}); + + if(!inplace) + { + add_padding_x({ &dst }); + } + } + // Allocate tensors src.allocator()->allocate(); updates.allocator()->allocate(); indices.allocator()->allocate(); - dst.allocator()->allocate(); + + if(!inplace) + { + dst.allocator()->allocate(); + } ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); ARM_COMPUTE_ASSERT(!updates.info()->is_resizable()); ARM_COMPUTE_ASSERT(!indices.info()->is_resizable()); - ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + + if(!inplace) + { + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + } // Fill update (a) and indices (b) tensors. - fill(AccessorType(src), 0); - fill(AccessorType(updates), 1); - fill_indices(AccessorType(indices), 2, out_shape); + fill(AccessorType(src), 0 + _hash); + fill(AccessorType(updates), 1+ _hash); + fill_indices(AccessorType(indices), 2 + _hash, out_shape); scatter.run(); - return dst; + + if(inplace) + { + return src; + } + else + { + return dst; + } } - SimpleTensor<T> compute_reference(const TensorShape &a_shape, const TensorShape &b_shape, const TensorShape &c_shape, const TensorShape &out_shape, DataType data_type, - ScatterInfo info, QuantizationInfo a_qinfo, QuantizationInfo o_qinfo) + SimpleTensor<T> compute_reference(const TensorShape &a_shape, const TensorShape &b_shape, const TensorShape &c_shape, + const TensorShape &out_shape, DataType data_type, ScatterInfo info, QuantizationInfo a_qinfo, QuantizationInfo o_qinfo) { // Output Quantization not currently in use - fixture should be extended to support this. ARM_COMPUTE_UNUSED(o_qinfo); + TensorShape src_shape = a_shape; + TensorShape updates_shape = b_shape; + TensorShape indices_shape = c_shape; + const int num_ind_dims = c_shape.num_dimensions(); + + // 1. Collapse batch index into a single dim if necessary for update tensor and indices tensor. + if(num_ind_dims >= 3) + { + indices_shape = indices_shape.collapsed_from(1); + updates_shape = updates_shape.collapsed_from(updates_shape.num_dimensions() - (num_ind_dims -1)); // Collapses batch dims + } + + // 2. Collapse data dims into a single dim. + // Collapse all src dims into 2 dims. First one holding data, the other being the index we iterate over. + src_shape.collapse(updates_shape.num_dimensions() - 1); // Collapse all data dims into single dim. + src_shape = src_shape.collapsed_from(1); // Collapse all index dims into a single dim + updates_shape.collapse(updates_shape.num_dimensions() - 1); // Collapse data dims (all except last dim which is batch dim) // Create reference tensors - SimpleTensor<T> src{ a_shape, data_type, 1, a_qinfo }; - SimpleTensor<T> updates{b_shape, data_type, 1, QuantizationInfo() }; - SimpleTensor<int32_t> indices{ c_shape, DataType::S32, 1, QuantizationInfo() }; + SimpleTensor<T> src{ src_shape, data_type, 1, a_qinfo }; + SimpleTensor<T> updates{updates_shape, data_type, 1, QuantizationInfo() }; + SimpleTensor<int32_t> indices{ indices_shape, DataType::S32, 1, QuantizationInfo() }; // Fill reference - fill(src, 0); - fill(updates, 1); - fill_indices(indices, 2, out_shape); + fill(src, 0 + _hash); + fill(updates, 1 + _hash); + fill_indices(indices, 2 + _hash, out_shape); - // Calculate individual reference. + // Calculate individual reference using collapsed shapes return reference::scatter_layer<T>(src, updates, indices, out_shape, info); } TensorType _target{}; SimpleTensor<T> _reference{}; + int32_t _hash{}; }; // This fixture will use the same shape for updates as indices. @@ -159,9 +239,12 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ScatterValidationFixture : public ScatterGenericValidationFixture<TensorType, AccessorType, FunctionType, T> { public: - void setup(TensorShape src_shape, TensorShape update_shape, TensorShape indices_shape, TensorShape out_shape, DataType data_type, ScatterFunction func, bool zero_init) + void setup(TensorShape src_shape, TensorShape update_shape, TensorShape indices_shape, + TensorShape out_shape, DataType data_type, ScatterFunction func, bool zero_init, bool inplace, bool padding) { - ScatterGenericValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(src_shape, update_shape, indices_shape, out_shape, data_type, ScatterInfo(func, zero_init), QuantizationInfo(), QuantizationInfo()); + ScatterGenericValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(src_shape, update_shape, + indices_shape, out_shape, data_type, ScatterInfo(func, zero_init), inplace, padding, + QuantizationInfo(), QuantizationInfo()); } }; diff --git a/tests/validation/reference/Conv3D.cpp b/tests/validation/reference/Conv3D.cpp index e4010a507a..38472a9aec 100644 --- a/tests/validation/reference/Conv3D.cpp +++ b/tests/validation/reference/Conv3D.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023 Arm Limited. + * Copyright (c) 2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -58,7 +58,7 @@ inline bool is_valid_pixel(int i, int min, int max) } // Evaluate the weights against an element in a given tensor. -template < typename T, typename TB, typename std::enable_if < validation::is_floating_point<T>::value &&validation::is_floating_point<TB>::value, int >::type = 0 > +template < typename T, typename TB, typename TACC, typename std::enable_if < validation::is_floating_point<T>::value &&validation::is_floating_point<TB>::value, int >::type = 0 > T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const Size3D &dilation, int batch, int z_start, int y_start, int x_start, int ch_out, UniformQuantizationInfo oq_info) { @@ -73,7 +73,7 @@ T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, c const unsigned int src_height = src.shape()[height_dim]; const unsigned int src_depth = src.shape()[depth_dim]; - T total(0); + TACC total(0); for(unsigned int weight_d = 0; weight_d < weights_depth; ++weight_d) { const int idx_z = z_start + dilation.depth * weight_d; @@ -112,10 +112,10 @@ T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, c const TB *b_ptr = bias.data(); TB bias_value = b_ptr[ch_out]; - return total + bias_value; + return static_cast<T>(total) + bias_value; } -template < typename T, typename TB, ARM_COMPUTE_REQUIRES_TA(std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) > +template < typename T, typename TB, typename TACC, ARM_COMPUTE_REQUIRES_TA(std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) > T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const Size3D &dilation, int batch, int z_start, int y_start, int x_start, int ch_out, UniformQuantizationInfo oq_info) { @@ -143,7 +143,7 @@ T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, c const float multiplier = input_scale * weights_scale / output_scale; arm_compute::quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift); - int32_t total(0); + TACC total(0); for(unsigned int weight_d = 0; weight_d < weights_depth; ++weight_d) { const int idx_z = z_start + dilation.depth * weight_d; @@ -189,7 +189,7 @@ T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, c } } // namespace -template <typename T, typename TB> +template <typename T, typename TB, typename TACC = T> SimpleTensor<T> conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &dst, const Conv3dInfo &conv3d_info) { // Compute reference @@ -237,7 +237,7 @@ SimpleTensor<T> conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weight T *out_ptr = dst.data(); const int out_offset = coord2index(dst.shape(), Coordinates{ ch_out, x_out, y_out, z_out, batch }); - out_ptr[out_offset] = calculate_conv3d<T, TB>(src, weights, bias, conv3d_info.dilation, batch, z_start, y_start, x_start, ch_out, dst.quantization_info().uniform()); + out_ptr[out_offset] = calculate_conv3d<T, TB, TACC>(src, weights, bias, conv3d_info.dilation, batch, z_start, y_start, x_start, ch_out, dst.quantization_info().uniform()); } } } @@ -246,13 +246,13 @@ SimpleTensor<T> conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weight return dst; } -template SimpleTensor<float> conv3d(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, SimpleTensor<float> &dst, +template SimpleTensor<float> conv3d<float, float, float>(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, SimpleTensor<float> &dst, const Conv3dInfo &conv3d_info); -template SimpleTensor<half> conv3d(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, SimpleTensor<half> &dst, +template SimpleTensor<half> conv3d<half, half, float>(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, SimpleTensor<half> &dst, const Conv3dInfo &conv3d_info); -template SimpleTensor<uint8_t> conv3d(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<uint8_t> &dst, +template SimpleTensor<uint8_t> conv3d<uint8_t, int32_t, int32_t>(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<uint8_t> &dst, const Conv3dInfo &conv3d_info); -template SimpleTensor<int8_t> conv3d(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<int8_t> &dst, +template SimpleTensor<int8_t> conv3d<int8_t, int32_t, int32_t>(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<int8_t> &dst, const Conv3dInfo &conv3d_info); } // namespace reference } // namespace validation diff --git a/tests/validation/reference/Conv3D.h b/tests/validation/reference/Conv3D.h index e3674f4bfb..a440b15d55 100644 --- a/tests/validation/reference/Conv3D.h +++ b/tests/validation/reference/Conv3D.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_CONV3D_LAYER_H -#define ARM_COMPUTE_TEST_CONV3D_LAYER_H +#ifndef ACL_TESTS_VALIDATION_REFERENCE_CONV3D_H +#define ACL_TESTS_VALIDATION_REFERENCE_CONV3D_H #include "Utils.h" #include "arm_compute/runtime/FunctionDescriptors.h" @@ -37,11 +37,11 @@ namespace validation { namespace reference { -template <typename T, typename TB> +template <typename T, typename TB, typename TACC> SimpleTensor<T> conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &dst, const Conv3dInfo &conv3d_info); } // namespace reference } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_CONV3D_LAYER_H */ +#endif // ACL_TESTS_VALIDATION_REFERENCE_CONV3D_H diff --git a/tests/validation/reference/DequantizationLayer.cpp b/tests/validation/reference/DequantizationLayer.cpp index 64a89aa6a0..67d69c2c38 100644 --- a/tests/validation/reference/DequantizationLayer.cpp +++ b/tests/validation/reference/DequantizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -59,6 +59,12 @@ TOut dequantize(int16_t val, const UniformQuantizationInfo qinfo, DataType dt) ARM_COMPUTE_UNUSED(dt); return static_cast<TOut>(dequantize_qsymm16(val, qinfo)); } +template <typename TOut> +TOut dequantize(int32_t val, const UniformQuantizationInfo qinfo, DataType dt) +{ + ARM_COMPUTE_UNUSED(dt); + return static_cast<TOut>(dequantize_s32(val, qinfo)); +} } // namespace template <typename TOut, typename TIn> SimpleTensor<TOut> dequantization_layer(const SimpleTensor<TIn> &src) @@ -115,6 +121,7 @@ template SimpleTensor<half> dequantization_layer(const SimpleTensor<int8_t> &src template SimpleTensor<float> dequantization_layer(const SimpleTensor<int8_t> &src); template SimpleTensor<half> dequantization_layer(const SimpleTensor<int16_t> &src); template SimpleTensor<float> dequantization_layer(const SimpleTensor<int16_t> &src); +template SimpleTensor<float> dequantization_layer(const SimpleTensor<int32_t> &src); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/QuantizationLayer.cpp b/tests/validation/reference/QuantizationLayer.cpp index b76263bf95..ad7ba7ac43 100644 --- a/tests/validation/reference/QuantizationLayer.cpp +++ b/tests/validation/reference/QuantizationLayer.cpp @@ -80,15 +80,6 @@ SimpleTensor<Tout> quantization_layer(const SimpleTensor<Tin> &src, DataType out dst[i] = quantize_qasymm16((src[i]), qinfo, rounding_policy); } break; - case DataType::F32: -#if defined(_OPENMP) - #pragma omp parallel for -#endif /* _OPENMP */ - for(int i = 0; i < src.num_elements(); ++i) - { - dst[i] = dequantize_s32((src[i]), qinfo); - } - break; default: ARM_COMPUTE_ERROR("Unsupported output data type"); } @@ -136,7 +127,6 @@ template SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<half> &src, template SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<float> &src, DataType output_data_type, const QuantizationInfo &quantization_info); template SimpleTensor<uint16_t> quantization_layer(const SimpleTensor<half> &src, DataType output_data_type, const QuantizationInfo &quantization_info); template SimpleTensor<uint16_t> quantization_layer(const SimpleTensor<float> &src, DataType output_data_type, const QuantizationInfo &quantization_info); -template SimpleTensor<float> quantization_layer(const SimpleTensor<int32_t> &src, DataType output_data_type, const QuantizationInfo &quantization_info); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/ScatterLayer.cpp b/tests/validation/reference/ScatterLayer.cpp index 7543b46bb1..55c48a9002 100644 --- a/tests/validation/reference/ScatterLayer.cpp +++ b/tests/validation/reference/ScatterLayer.cpp @@ -23,6 +23,7 @@ */ #include "ScatterLayer.h" #include "tests/validation/Helpers.h" +#include "arm_compute/core/TensorShape.h" namespace arm_compute { @@ -62,38 +63,70 @@ T reduce_op(const T ¤t,const T &update,const ScatterFunction func) } template float reduce_op(const float ¤t,const float &update,const ScatterFunction func); +template half reduce_op(const half ¤t,const half &update,const ScatterFunction func); } -// Note : This function currently only supports 1D src, 1D updates, 2D indices, 1D output tensors. +// NOTE: This function expects collapsed tensors as input. +// Batch dims for update/indices tensors should be collapsed into a single dim. +// Data dims should be collapsed into a single dim for both update and src tensors prior to calling this function. template <typename T> SimpleTensor<T> scatter_layer_internal(const SimpleTensor<T> &src, const SimpleTensor<T> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info) { + // 1. If zero initialization variable is false, copy src data to dst. SimpleTensor<T> dst{ out_shape, src.data_type(), 1 }; - - // 1. If zero initialization variable is true, fill dst with 0 values. Else copy src data to dst. - if(info.zero_initialization) - { - for (int i = 0; i < src.num_elements(); ++i) - { - dst[i] = static_cast<T>(0); - } - } - else + if(!info.zero_initialization) { std::copy_n(src.data(), src.num_elements(), dst.data()); } - // 2. Get max index of output tensor, then iterate over index tensor. - const int x_bound = static_cast<int>(dst.shape().x()); + // Number of elements between each value of the dim being iterated through + const unsigned int data_stride = updates.shape().total_size_lower(updates.shape().num_dimensions() - 1); + const unsigned int no_output_dims = out_shape.num_dimensions(); + // Calculate output stride at given index for all output dims. + std::vector<unsigned int> out_stride_at_idx(no_output_dims); + for (unsigned int i = 0 ; i < no_output_dims; i++) + { + out_stride_at_idx[i] = out_shape.total_size_lower(i); + } - for(int i = 0; i < indices.num_elements(); ++i) + const unsigned int indices_x_dim = static_cast<unsigned int>(indices.shape()[0]); + const unsigned int indices_y_dim = static_cast<unsigned int>(indices.shape()[1]); + + // 2. Iterate over indices tensor y-dim and replace sections of dst tensor with relevant areas of update tensor. + for(unsigned int i = 0; i < indices_y_dim; i++) { - // 3. Check whether index is out of bounds for dst, if not then apply reduce op. - const auto index = indices[i]; - if (index < x_bound && index >= 0) // Note : we ignore negative index values. + // NOTE : Currently, indices.shape() == [X, Y, 1, 1], where X is the indices dim and Y is the batch dim + // Starting index for both the update and indices tensors. + const unsigned int update_dim_start = i * data_stride; + const unsigned int indices_dim_start = i * indices_x_dim; + bool out_of_bounds = false; + unsigned int out_offset_acc = 0; + + // Iterate over each indices value for the relevant batch and accumulate the offset. + for(unsigned int j = 0; j < indices_x_dim; j++) + { + // Get first index value with i * indices_x_dim (iterating through y-dim/batch idx), then iterate through x dim by adding k + const int index_value = indices[indices_dim_start + j]; + const unsigned int out_dim = no_output_dims - (j+1); // Calculate corresponding output dim to current index value. + if(index_value < static_cast<int>(out_shape[out_dim]) && index_value >= 0) + { + out_offset_acc += (index_value * out_stride_at_idx[out_dim]); // offset accumulation + } + else + { + out_of_bounds = true; + break; + } + } + + // If not out of bounds, copy update tensor elements to output + if(!out_of_bounds) { - dst[index] = reduce_op(dst[index], updates[i], info.func); + for (unsigned int j = 0 ; j < data_stride; j++) + { + dst[out_offset_acc + j] = reduce_op(dst[out_offset_acc + j], updates[update_dim_start + j], info.func); + } } } return dst; @@ -106,7 +139,13 @@ SimpleTensor<T> scatter_layer(const SimpleTensor<T> &src, const SimpleTensor<T> } template SimpleTensor<float> scatter_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); - +template SimpleTensor<half> scatter_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<int32_t> scatter_layer(const SimpleTensor<int32_t> &src, const SimpleTensor<int32_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<uint32_t> scatter_layer(const SimpleTensor<uint32_t> &src, const SimpleTensor<uint32_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<int16_t> scatter_layer(const SimpleTensor<int16_t> &src, const SimpleTensor<int16_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<uint16_t> scatter_layer(const SimpleTensor<uint16_t> &src, const SimpleTensor<uint16_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<int8_t> scatter_layer(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<uint8_t> scatter_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/runtime/experimental/operators/CpuGemm.cpp b/tests/validation/runtime/experimental/operators/CpuGemm.cpp new file mode 100644 index 0000000000..c6df429a4d --- /dev/null +++ b/tests/validation/runtime/experimental/operators/CpuGemm.cpp @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/experimental/operators/CpuGemm.h" +#include "src/core/helpers/MemoryHelpers.h" +#include "tests/NEON/Accessor.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/fixtures/GEMMFixture.h" + +/* + * Tests for arm_compute::experimental::ops::CpuGemm which is a shallow wrapper for + * arm_compute::cpu::CpuGemm. Any future testing to the functionalities of cpu::CpuGemm will + * be tested in tests/NEON/GEMM.cpp given that ops::CpuGemm remain a shallow wrapper. +*/ + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +using framework::dataset::make; + +namespace +{ +/** CNN data types */ +const auto CNNDataTypes = make("DataType", +{ + DataType::F32, +}); +} // namespace + +TEST_SUITE(NEON) +TEST_SUITE(OPERATORS) + +TEST_SUITE(CPUGEMM) +/** Test case for memory injection in @ref arm_compute::experimental::ops::CpuGemm. + * + * Configure the operator once and inject memory at run-time in multiple executions. + * + * Checks performed in order: + * - Both runs compute the same output + */ +TEST_CASE(OpsCpuGemmMemoryInjection, framework::DatasetMode::ALL) +{ + auto gemm = std::make_unique<arm_compute::experimental::ops::CpuGemm>(); + const auto lhs_info = TensorInfo(TensorShape(3U, 3U), 1, DataType::F32); + const auto rhs_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32); + const auto c_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32); + auto dst_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32); + const auto gemm_info = GEMMInfo{}; + gemm->configure(&lhs_info, &rhs_info, &c_info, &dst_info, 1.f, 1.f, gemm_info); + + // telhs are newly created every call of this lambda function + auto lhs = create_tensor<Tensor>(lhs_info); + auto rhs = create_tensor<Tensor>(rhs_info); + auto c = create_tensor<Tensor>(c_info); + lhs.allocator()->allocate(); + rhs.allocator()->allocate(); + c.allocator()->allocate(); + + ITensorPack run_pack{ { TensorType::ACL_SRC_0, &lhs }, { TensorType::ACL_SRC_1, &rhs }, { TensorType::ACL_SRC_2, &c } }; + ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &rhs }, { TensorType::ACL_SRC_2, &c } }; + + auto mg = MemoryGroup{}; + auto ws = manage_workspace<Tensor>(gemm->workspace(), mg, run_pack, prep_pack); + + auto run_conv = [&]() -> Tensor + { + auto dst = create_tensor<Tensor>(dst_info); + dst.allocator()->allocate(); + run_pack.add_tensor(TensorType::ACL_DST, &dst); + + library->fill_tensor_value(Accessor(lhs), 1.f); + library->fill_tensor_value(Accessor(rhs), 2.f); + library->fill_tensor_value(Accessor(c), 3.f); + // This operator is configured once and captured by this lambda. + gemm->prepare(prep_pack); + gemm->run(run_pack); + return dst; + }; + auto result_0 = run_conv(); + auto result_1 = run_conv(); + for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) + { + ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS); + } +} + +DATA_TEST_CASE(OpsCpuGemmValidateAccumulate, framework::DatasetMode::ALL, combine( + zip(make("In0",{ TensorShape(21U, 13U) }), + make("In1", { TensorShape(33U, 21U) }), + make("Dst", { TensorShape(33U, 13U) })), + zip( + make("alpha", { 1.0, 100.0, 1.0, 1.0 }), + make("beta", { 0.0, 0.0, 1.0, 1.0 }), + make("is_c_null", { false, false, false, true }), + make("Expected", { true, false, false, true }))), + shape_a, shape_b, shape_dst, alpha, beta, is_c_null, expected) +{ + /* Accumulation test for GEMM kernels */ + // Create tensors + TensorInfo in_a(shape_a, 1, DataType::F32); + TensorInfo in_b(shape_b, 1, DataType::F32); + TensorInfo in_c(shape_dst, 1, DataType::F32); + TensorInfo dst(shape_dst, 1, DataType::F32); + + GEMMInfo gemm_info = GEMMInfo(); + gemm_info.set_accumulate(true); + + // Validate accumulation + arm_compute::experimental::ops::CpuGemm gemm; + Status status = gemm.validate(&in_a, &in_b, (is_c_null ? nullptr : &in_c), &dst, alpha, beta, gemm_info); + ARM_COMPUTE_EXPECT((expected == bool(status)), framework::LogLevel::ERRORS); +} + +TEST_SUITE_END() // CPUGEMM +TEST_SUITE_END() // OPERATORS +TEST_SUITE_END() // NEON +} // namespace validation +} // namespace test +} // namespace arm_compute |