From f26ea2f8cc957a1e6faf0361dea805fb2e236061 Mon Sep 17 00:00:00 2001 From: Ramy Elgammal Date: Fri, 24 Mar 2023 11:42:03 +0000 Subject: Implement MatMul Function Resolves: COMPMID-5949 Signed-off-by: Ramy Elgammal Change-Id: Idd8cfe6ea94a14f0b23178f6781251b5f0955563 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9390 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir Benchmark: Arm Jenkins --- Android.bp | 2 + arm_compute/runtime/CL/CLFunctions.h | 3 +- arm_compute/runtime/CL/functions/CLMatMul.h | 99 +++++++++++++++ filelist.json | 4 +- src/gpu/cl/kernels/ClNativeMatMulKernel.h | 4 +- src/gpu/cl/operators/ClMatMul.cpp | 80 +++++++++++++ src/gpu/cl/operators/ClMatMul.h | 84 +++++++++++++ src/runtime/CL/functions/CLMatMul.cpp | 69 +++++++++++ tests/validation/CL/MatMul.cpp | 94 +++++++++++++++ tests/validation/fixtures/MatMulFixture.h | 180 ++++++++++++++++++++++++++++ utils/TypePrinter.h | 29 +++++ 11 files changed, 643 insertions(+), 5 deletions(-) create mode 100644 arm_compute/runtime/CL/functions/CLMatMul.h create mode 100644 src/gpu/cl/operators/ClMatMul.cpp create mode 100644 src/gpu/cl/operators/ClMatMul.h create mode 100644 src/runtime/CL/functions/CLMatMul.cpp create mode 100644 tests/validation/CL/MatMul.cpp create mode 100644 tests/validation/fixtures/MatMulFixture.h diff --git a/Android.bp b/Android.bp index 5617812539..f315def2e6 100644 --- a/Android.bp +++ b/Android.bp @@ -741,6 +741,7 @@ cc_library_static { "src/gpu/cl/operators/ClGemmLowpOutputStage.cpp", "src/gpu/cl/operators/ClIndirectConv2d.cpp", "src/gpu/cl/operators/ClLogicalNot.cpp", + "src/gpu/cl/operators/ClMatMul.cpp", "src/gpu/cl/operators/ClMul.cpp", "src/gpu/cl/operators/ClPRelu.cpp", "src/gpu/cl/operators/ClPermute.cpp", @@ -823,6 +824,7 @@ cc_library_static { "src/runtime/CL/functions/CLLogicalAnd.cpp", "src/runtime/CL/functions/CLLogicalNot.cpp", "src/runtime/CL/functions/CLLogicalOr.cpp", + "src/runtime/CL/functions/CLMatMul.cpp", "src/runtime/CL/functions/CLMaxUnpoolingLayer.cpp", "src/runtime/CL/functions/CLMeanStdDevNormalizationLayer.cpp", "src/runtime/CL/functions/CLNormalizationLayer.cpp", diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h index e37134d454..26e459680c 100644 --- a/arm_compute/runtime/CL/CLFunctions.h +++ b/arm_compute/runtime/CL/CLFunctions.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2022 Arm Limited. + * Copyright (c) 2016-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -77,6 +77,7 @@ #include "arm_compute/runtime/CL/functions/CLLogicalAnd.h" #include "arm_compute/runtime/CL/functions/CLLogicalNot.h" #include "arm_compute/runtime/CL/functions/CLLogicalOr.h" +#include "arm_compute/runtime/CL/functions/CLMatMul.h" #include "arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h" #include "arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h" #include "arm_compute/runtime/CL/functions/CLNormalizationLayer.h" diff --git a/arm_compute/runtime/CL/functions/CLMatMul.h b/arm_compute/runtime/CL/functions/CLMatMul.h new file mode 100644 index 0000000000..56dd9c5655 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLMatMul.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_RUNTIME_CL_FUNCTIONS_CLMATMUL +#define ARM_COMPUTE_RUNTIME_CL_FUNCTIONS_CLMATMUL + +#include "arm_compute/runtime/IFunction.h" +#include +namespace arm_compute +{ +// Forward declarations for used types instead of including their header, that could minimize compile time +class CLCompileContext; +class ICLTensor; +class ITensorInfo; +class MatMulInfo; +class Status; + +/** Basic function to execute MatMul (Matrix Multiplication) on OpenCL */ +class CLMatMul : public IFunction +{ +public: + /** Default constructor.*/ + CLMatMul(); + /** Default destructor */ + ~CLMatMul(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMatMul(const CLMatMul &) = delete; + /** Default move constructor */ + CLMatMul(CLMatMul &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMatMul &operator=(const CLMatMul &) = delete; + /** Default move assignment operator */ + CLMatMul &operator=(CLMatMul &&); + /** Initialise the kernel's inputs and output + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |lhs |rhs |output | + * |:------------|:------------|:--------------| + * |F32 |F32 |F32 | + * |F16 |F16 |F16 | + * + * @note BatchMatMul: Batched Matrix Multiply - [A * B], Multiplies all slices (slice is an element of a batch) of Tensors A and B + * and stores the result in the dst tensor of the same batch size. + * Batch here is number of slices from A and B multiplied at a time, do not confuse with the batch dimension 'N' of NHWC/NCHW + * For NHWC for example: the batch is the higher dimensions H * N, and in general it is H * all higher dimensions. + * @note All tensors must have the same data type. + * + * @param[in] compile_context The compile context to be used. + * @param[in] lhs LHS input tensor (Matrix or Vector A). Data types supported: F16/F32 + * @param[in] rhs RHS input tensor (Matrix B). Data type supported: same as @p lhs. + * @param[out] output Output tensor. Data type supported: same as @p lhs. + * @param[in] matmul_info Attributes for MatMul + */ + void configure(const CLCompileContext &compile_context, ICLTensor *rhs, ICLTensor *lhs, ICLTensor *output, const MatMulInfo &matmul_info); + /** Initialise the kernel's inputs and output + * + * Similar to @ref CLMatMul::configure() + */ + void configure(ICLTensor *lhs, ICLTensor *rhs, ICLTensor *output, const MatMulInfo &matmul_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLMatMul. + * + * Similar to @ref CLMatMul::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *output, const MatMulInfo &matmul_info); + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr _impl; +}; +} // namespace arm_compute + +#endif /* ARM_COMPUTE_RUNTIME_CL_FUNCTIONS_CLMATMUL */ diff --git a/filelist.json b/filelist.json index 1e59adfc8e..c8e1ce0b9b 100644 --- a/filelist.json +++ b/filelist.json @@ -512,7 +512,9 @@ "MatMul": { "files": { "common": [ - "src/gpu/cl/kernels/ClNativeMatMulKernel.cpp" + "src/gpu/cl/kernels/ClNativeMatMulKernel.cpp", + "src/gpu/cl/operators/ClMatMul.cpp", + "src/runtime/CL/functions/CLMatMul.cpp" ] } }, diff --git a/src/gpu/cl/kernels/ClNativeMatMulKernel.h b/src/gpu/cl/kernels/ClNativeMatMulKernel.h index 021292a4ae..3d0f18ec84 100644 --- a/src/gpu/cl/kernels/ClNativeMatMulKernel.h +++ b/src/gpu/cl/kernels/ClNativeMatMulKernel.h @@ -24,8 +24,6 @@ #ifndef ACL_SRC_GPU_CL_KERNELS_CLNATIVEMATMULKERNEL #define ACL_SRC_GPU_CL_KERNELS_CLNATIVEMATMULKERNEL -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/KernelDescriptors.h" #include "src/core/common/Macros.h" #include "src/gpu/cl/ClCompileContext.h" @@ -65,7 +63,7 @@ public: void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; private: - bool _export_rhs_to_cl_image { false }; + bool _export_rhs_to_cl_image{ false }; }; } // namespace kernels } // namespace opencl diff --git a/src/gpu/cl/operators/ClMatMul.cpp b/src/gpu/cl/operators/ClMatMul.cpp new file mode 100644 index 0000000000..dadaa1f779 --- /dev/null +++ b/src/gpu/cl/operators/ClMatMul.cpp @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/gpu/cl/operators/ClMatMul.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/common/utils/Log.h" +#include "src/gpu/cl/kernels/ClNativeMatMulKernel.h" + +namespace arm_compute +{ +namespace opencl +{ +using namespace arm_compute::opencl::kernels; +ClMatMul::ClMatMul() + : _native_matmul_kernel(std::make_unique()) +{ +} +ClMatMul::~ClMatMul() +{ +} +Status ClMatMul::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *output, const MatMulInfo &matmul_info) +{ + MatMulKernelInfo kernel_info; + kernel_info.adj_lhs = matmul_info.adj_lhs(); + kernel_info.adj_rhs = matmul_info.adj_rhs(); + return ClNativeMatMulKernel::validate(lhs, rhs, output, kernel_info); +} +void ClMatMul::configure(const CLCompileContext &compile_context, ITensorInfo *lhs, ITensorInfo *rhs, ITensorInfo *output, const MatMulInfo &matmul_info) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs, output); + ARM_COMPUTE_LOG_PARAMS(lhs, rhs, output, matmul_info); + + // Perform validation step + ARM_COMPUTE_ERROR_THROW_ON(validate(lhs, rhs, output, matmul_info)); + const GPUTarget gpu_target = CLScheduler::get().target(); + + // Placeholder: Getting the heuristics calculated values for M0, N0, K0, and whether to export RHS to texture pipe + + // Filling the MatMul Kernel info + MatMulKernelInfo kernel_info; + kernel_info.adj_lhs = matmul_info.adj_lhs(); + kernel_info.adj_rhs = matmul_info.adj_rhs(); + kernel_info.m0 = 1; // to be properly calculated from heuristics + kernel_info.n0 = 4; // to be properly calculated from heuristics + kernel_info.k0 = 4; // to be properly calculated from heuristics + kernel_info.export_rhs_to_cl_image = false; // to be properly determined from heuristics + + // Set the target for the kernels + _native_matmul_kernel->set_target(gpu_target); + + // Configure the native matrix multiply kernel + _native_matmul_kernel->configure(compile_context, lhs, rhs, output, kernel_info); +} +void ClMatMul::run(ITensorPack &tensors) +{ + CLScheduler::get().enqueue_op(*_native_matmul_kernel, tensors, true); +} +} // namespace opencl +} // namespace arm_compute diff --git a/src/gpu/cl/operators/ClMatMul.h b/src/gpu/cl/operators/ClMatMul.h new file mode 100644 index 0000000000..894b8d5816 --- /dev/null +++ b/src/gpu/cl/operators/ClMatMul.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_SRC_GPU_CL_OPERATORS_ClMatMul +#define ARM_COMPUTE_SRC_GPU_CL_OPERATORS_ClMatMul + +#include "src/gpu/cl/IClOperator.h" +#include "src/gpu/cl/kernels/ClNativeMatMulKernel.h" +#include + +namespace arm_compute +{ +namespace opencl +{ +/** Basic operator to execute BatchMatMul on OpenCL. This operator calls the following OpenCL kernels: + * + * -# @ref kernels::ClNativeMatMulKernel + */ +class ClMatMul : public IClOperator +{ +public: + /** Constructor */ + ClMatMul(); + ~ClMatMul(); + /** Initialise the kernel's inputs and output + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |lhs |rhs |output | + * |:------------|:------------|:------------| + * |F32 |F32 |F32 | + * |F16 |F16 |F16 | + * + * @note BatchMatMul: Batched Matrix Multiply - [A * B], Multiplies all slices (slice is an element of a batch) of Tensors A and B + * and stores the result in the dst tensor of the same batch size. + * Batch here is number of slices from A and B multiplied at a time, do not confuse with the batch dimension 'N' of NHWC/NCHW + * For NHWC for example: the batch is the higher dimensions H * N, and in general it is H * all higher dimensions. + * @note All tensors must have the same data type. + * + * @param[in] compile_context The compile context to be used. + * @param[in] lhs LHS input tensor info (Matrix A). Data types supported: F16/F32 + * @param[in] rhs RHS input tensor info (Matrix B). Data types supported: same as @p lhs. + * @param[out] output Output tensor info. Data types supported: same as @p lhs + * @param[in] matmul_info Attributes for MatMul + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *lhs, ITensorInfo *rhs, ITensorInfo *output, const MatMulInfo &matmul_info); + /** Static function to check if given info will lead to a valid configuration + * + * Similar to @ref ClMatMul::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *output, const MatMulInfo &matmul_info); + // Inherited methods overridden: + void run(ITensorPack &tensors) override; + +private: + std::unique_ptr _native_matmul_kernel; +}; +} // namespace opencl +} // namespace arm_compute +#endif // ARM_COMPUTE_SRC_GPU_CL_OPERATORS_ClMatMul diff --git a/src/runtime/CL/functions/CLMatMul.cpp b/src/runtime/CL/functions/CLMatMul.cpp new file mode 100644 index 0000000000..f42e4ff309 --- /dev/null +++ b/src/runtime/CL/functions/CLMatMul.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLMatMul.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTypes.h" +#include "src/gpu/cl/operators/ClMatMul.h" + +namespace arm_compute +{ +using OperatorType = opencl::ClMatMul; + +struct CLMatMul::Impl +{ + std::unique_ptr op{ nullptr }; + ITensorPack run_pack{}; +}; +CLMatMul::CLMatMul() + : _impl(std::make_unique()) +{ +} + +CLMatMul::~CLMatMul() = default; + +void CLMatMul::configure(ICLTensor *lhs, ICLTensor *rhs, ICLTensor *output, const MatMulInfo &matmul_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), lhs, rhs, output, matmul_info); +} + +void CLMatMul::configure(const CLCompileContext &compile_context, ICLTensor *lhs, ICLTensor *rhs, ICLTensor *output, const MatMulInfo &matmul_info) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs, output); + + _impl->op = std::make_unique(); + _impl->op->configure(compile_context, lhs->info(), rhs->info(), output->info(), matmul_info); + _impl->run_pack = { { ACL_SRC_0, lhs }, { ACL_SRC_1, rhs }, { ACL_DST, output } }; +} + +Status CLMatMul::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *output, const MatMulInfo &matmul_info) +{ + return OperatorType::validate(lhs, rhs, output, matmul_info); +} + +void CLMatMul::run() +{ + _impl->op->run(_impl->run_pack); +} + +} // namespace arm_compute diff --git a/tests/validation/CL/MatMul.cpp b/tests/validation/CL/MatMul.cpp new file mode 100644 index 0000000000..bd259f785e --- /dev/null +++ b/tests/validation/CL/MatMul.cpp @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLMatMul.h" +#include "tests/CL/CLAccessor.h" +#include "tests/datasets/LargeMatMulDataset.h" +#include "tests/datasets/SmallMatMulDataset.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/MatMulFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +RelativeTolerance tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for fp32 data type */ +constexpr float abs_tolerance_f32( + 0.0001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for fp32 data type in case using relative tolerance fails because of small values */ +constexpr float abs_tolerance_f16( + 0.001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for fp16 data type in case using relative tolerance fails because of small values */ +RelativeTolerance tolerance_f16(half(0.01)); /**< Tolerance value for comparing reference's output against implementation's output for fp16 data type */ +} // namespace +template +using MatMulFixture = MatMulValidationFixture; + +TEST_SUITE(CL) +TEST_SUITE(MatMul) +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, MatMulFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallMatMulDataset(), + framework::dataset::make("pretransose_A", { false, true })), + framework::dataset::make("pretransose_B", { false, true })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, MatMulFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("pretransose_A", { false, true })), + framework::dataset::make("pretransose_B", { false, true })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() // FP32 +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, MatMulFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallMatMulDataset(), + framework::dataset::make("pretransose_A", { false, true })), + framework::dataset::make("pretransose_B", { false, true })), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunLarge, MatMulFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("pretransose_A", { false, true })), + framework::dataset::make("pretransose_B", { false, true })), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} +TEST_SUITE_END() // FP16 +TEST_SUITE_END() // MatMul +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/fixtures/MatMulFixture.h b/tests/validation/fixtures/MatMulFixture.h new file mode 100644 index 0000000000..1112dcb2fb --- /dev/null +++ b/tests/validation/fixtures/MatMulFixture.h @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TESTS_VALIDATION_FIXTURES_MATMULFIXTURE +#define TESTS_VALIDATION_FIXTURES_MATMULFIXTURE + +#include "arm_compute/core/Types.h" +#include "tests/framework/Fixture.h" +#include "tests/validation/reference/GEMM.h" +#include "tests/validation/reference/Permute.h" +#include "tests/validation/reference/Permute.h" +#include "tests/validation/reference/ReshapeLayer.h" +#include +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template +class MatMulValidationFixture : public framework::Fixture +{ +public: + template + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape output_shape, bool pretranspose_a, bool pretranspose_b, DataType data_type) + { + // For brevity, the input shapes are assumed to be not-transposed for both Lhs and Rhs matrices. + if(pretranspose_a) + { + permute(shape_a, PermutationVector(1U, 0U)); + } + if(pretranspose_b) + { + permute(shape_b, PermutationVector(1U, 0U)); + } + _target = compute_target(shape_a, shape_b, output_shape, pretranspose_a, pretranspose_b, data_type); + _reference = compute_reference(shape_a, shape_b, output_shape, pretranspose_a, pretranspose_b, data_type); + } + +protected: + template + void fill(U &&tensor, int i, float lo = -1.f, float hi = 1.f) + { + switch(tensor.data_type()) + { + case DataType::F16: + { + arm_compute::utils::uniform_real_distribution_16bit distribution{ float(lo), float(hi) }; + library->fill(tensor, distribution, i); + break; + } + case DataType::F32: + { + std::uniform_real_distribution distribution(lo, hi); + library->fill(tensor, distribution, i); + break; + } + default: + library->fill_tensor_uniform(tensor, i); + } + } + TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &output_shape, bool pretranspose_a, bool pretranspose_b, DataType data_type) + { + // 1. Create Classes and configure function + // Create tensors + TensorType a = create_tensor(shape_a, data_type, 1); + TensorType b = create_tensor(shape_b, data_type, 1); + TensorType dst = create_tensor(output_shape, data_type, 1); + FunctionType matmul; + // Configure MatMulInfo class + MatMulInfo info; + info.adj_lhs(pretranspose_a); + info.adj_rhs(pretranspose_b); + matmul.configure(&a, &b, &dst, info); + // Assertions + ARM_COMPUTE_ASSERT(a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(b.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + // Allocate tensors + a.allocator()->allocate(); + b.allocator()->allocate(); + dst.allocator()->allocate(); + ARM_COMPUTE_ASSERT(!a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!b.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + + // 2. Fill tensors and run once + // Fill tensors + fill(AccessorType(a), 0); + fill(AccessorType(b), 1); + matmul.run(); // First run + + return dst; + } + SimpleTensor compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &output_shape, bool pretranspose_a, bool pretranspose_b, DataType data_type) + { + // We collapse dimensions > 3 onto dimension 3, i.e. 5D+ tensors will look like 4D + // This is necessary unless we choose to extend gemm reference for 5D+ tensors + TensorShape output_shape_collapsed = output_shape.collapsed_from(Window::DimW); + TensorShape a_shape_collapsed = shape_a.collapsed_from(Window::DimW); + TensorShape b_shape_collapsed = shape_b.collapsed_from(Window::DimW); + + // Create reference + SimpleTensor a{ a_shape_collapsed, data_type, 1 }; + SimpleTensor b{ b_shape_collapsed, data_type, 1 }; + SimpleTensor c{ output_shape_collapsed, data_type, 1 }; + + // Fill reference + fill(a, 0); + fill(b, 1); + + /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_a is set to true, then A is assumed to be (B x K x M), + therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K) + in order to be able to call reference implementation that works with (B x M x K) input. + Similarly, if pretranspose_b is set to true, then B is assumed to be (B x N x K), B must be pre-transposed before passing it to the fixture. */ + + // Define transposed shapes + TensorShape a_transposed_shape(a.shape()); + a_transposed_shape.set(0, a.shape().y()); + a_transposed_shape.set(1, a.shape().x()); + TensorShape b_transposed_shape(b.shape()); + b_transposed_shape.set(0, b.shape().y()); + b_transposed_shape.set(1, b.shape().x()); + + // Define transposed tensors + SimpleTensor a_transposed{ a_transposed_shape, data_type }; + SimpleTensor b_transposed{ b_transposed_shape, data_type }; + + // pretranspose a if necessary + if(pretranspose_a) + { + a_transposed = reference::permute(a, PermutationVector(1U, 0U)); + } + + // pretranspose b if necessary + if(pretranspose_b) + { + b_transposed = reference::permute(b, PermutationVector(1U, 0U)); + } + + // Setting beta to 0 will effectively disable C for the + // computation of the reference: alpha * A * B + 0 * C + // Use transposed tensors if boolean enabled else use original tensors + SimpleTensor result = reference::gemm((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, 1.0f, 0.f); + + // We reshape the gemm output back if the tensor is high dimensional + if(output_shape_collapsed != output_shape) + { + result = reference::reshape_layer(result, output_shape); + } + + return result; + } + TensorType _target{}; + SimpleTensor _reference{}; +}; +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* TESTS_VALIDATION_FIXTURES_MATMULFIXTURE */ diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h index c3af0a2419..9b9c7b5b34 100644 --- a/utils/TypePrinter.h +++ b/utils/TypePrinter.h @@ -3678,6 +3678,35 @@ inline std::string to_string(const experimental::dynamic_fusion::SoftmaxAttribut str << softmax_attr; return str.str(); } +/** Formatted output of the arm_compute::MatMulInfo type. + * + * @param[out] os Output stream. + * @param[in] matmul_info arm_compute::MatMulInfo type to output. + * + * @return Modified output stream. + */ +inline ::std::ostream &operator<<(::std::ostream &os, const arm_compute::MatMulInfo &matmul_info) +{ + os << "MatMulKernelInfo=" + << "[" + << "adj_lhs=" << matmul_info.adj_lhs() << ", " + << "adj_rhs=" << matmul_info.adj_rhs() << ", " + << "fused_activation=" << matmul_info.fused_activation().activation() << "]"; + + return os; +} +/** Formatted output of the arm_compute::MatMulInfo type. + * + * @param[in] matmul_info arm_compute::MatMulInfo type to output. + * + * @return Formatted string. + */ +inline std::string to_string(const arm_compute::MatMulInfo &matmul_info) +{ + std::stringstream str; + str << matmul_info; + return str.str(); +} /** Formatted output of the arm_compute::MatMulKernelInfo type. * -- cgit v1.2.1