From 5a4fe19c23729f1e58e947ed15e865dc33c35ff6 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Mon, 16 Mar 2020 12:22:37 +0000 Subject: COMPMID-3112: Reworking heuristic for CLGEMM - part1 The new heuristic only affects the floating point execution Change-Id: Ia6edc14ab1bdda4cee31b7afb096d0305d99b809 Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2942 Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- Android.bp | 3 + SConscript | 1 + arm_compute/runtime/CL/CLTypes.h | 58 ++++++++ arm_compute/runtime/CL/ICLGEMMKernelSelection.h | 66 +++++++++ arm_compute/runtime/CL/functions/CLGEMM.h | 32 ++--- .../runtime/CL/gemm/CLGEMMKernelSelection.h | 65 +++++++++ .../runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h | 53 +++++++ .../runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h | 53 +++++++ .../runtime/CL/gemm/CLGEMMKernelSelectionValhall.h | 53 +++++++ ...MMReshapedOnlyRHSKernelConfigurationValhall.cpp | 31 ++++- src/runtime/CL/functions/CLGEMM.cpp | 130 ++++++------------ .../CL/gemm/CLGEMMKernelSelectionBifrost.cpp | 152 +++++++++++++++++++++ .../CL/gemm/CLGEMMKernelSelectionMidgard.cpp | 95 +++++++++++++ .../CL/gemm/CLGEMMKernelSelectionValhall.cpp | 105 ++++++++++++++ 14 files changed, 778 insertions(+), 119 deletions(-) create mode 100644 arm_compute/runtime/CL/CLTypes.h create mode 100644 arm_compute/runtime/CL/ICLGEMMKernelSelection.h create mode 100644 arm_compute/runtime/CL/gemm/CLGEMMKernelSelection.h create mode 100644 arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h create mode 100644 arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h create mode 100644 arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h create mode 100644 src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp create mode 100644 src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.cpp create mode 100644 src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.cpp diff --git a/Android.bp b/Android.bp index f31dabbe07..7c63e6f002 100644 --- a/Android.bp +++ b/Android.bp @@ -564,6 +564,9 @@ cc_library_static { "src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp", "src/runtime/CL/functions/CLWinogradInputTransform.cpp", "src/runtime/CL/functions/CLYOLOLayer.cpp", + "src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp", + "src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.cpp", + "src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.cpp", "src/runtime/CL/tuners/BifrostTuner.cpp", "src/runtime/CL/tuners/CLLWSList.cpp", "src/runtime/CL/tuners/MidgardTuner.cpp", diff --git a/SConscript b/SConscript index b31f014db6..9c62f37abc 100644 --- a/SConscript +++ b/SConscript @@ -211,6 +211,7 @@ if env['opencl']: runtime_files += Glob('src/runtime/CL/*.cpp') runtime_files += Glob('src/runtime/CL/functions/*.cpp') + runtime_files += Glob('src/runtime/CL/gemm/*.cpp') runtime_files += Glob('src/runtime/CL/tuners/*.cpp') graph_files += Glob('src/graph/backends/CL/*.cpp') diff --git a/arm_compute/runtime/CL/CLTypes.h b/arm_compute/runtime/CL/CLTypes.h new file mode 100644 index 0000000000..f7b4ebd9b4 --- /dev/null +++ b/arm_compute/runtime/CL/CLTypes.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_RUNTIME_CLTYPES_H +#define ARM_COMPUTE_RUNTIME_CLTYPES_H + +namespace arm_compute +{ +/** OpenCL GEMM kernel types */ +enum class CLGEMMKernelType +{ + /** Native GEMM kernel with fixed block size. + * @note Temporary variant to keep compatibility with the old implementation. + * @note This variant will be deprecated in favor of a new and configurable NATIVE variant + */ + NATIVE_V1, + /** Reshaped GEMM kernel where both lhs and rhs matrices are reshaped. Fixed block size fixed. + * @note Temporary variant to keep compatibility with the old implementation. + * @note This variant will be deprecated in favor of RESHAPED + */ + RESHAPED_V1, + /** Reshaped GEMM kernel where both lhs and rhs matrices are reshaped. Configurable reshape and block size */ + RESHAPED, + /** Reshaped GEMM kernel where only the rhs matrix is reshaped. Configurable reshape and block size */ + RESHAPED_ONLY_RHS +}; + +/** OpenCL GEMM kernel selection parameters. These information are retrieved to select the GEMM kernel on OpenCL */ +struct CLGEMMKernelSelectionParams +{ + unsigned int m{ 0 }; /**< Number of rows for the lhs matrix. Lhs matrix NOT transposed */ + unsigned int n{ 0 }; /**< Number of columns for the rhs matrix. Rhs matrix NOT transposed */ + unsigned int k{ 0 }; /**< Number of rows for the rhs matrix. Rhs matrix NOT transposed */ + bool is_rhs_constant{ false }; /**< True if the content of the rhs matrix is constant */ + DataType data_type{DataType::UNKNOWN}; /**< Data type */ +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_RUNTIME_CLTYPES_H */ diff --git a/arm_compute/runtime/CL/ICLGEMMKernelSelection.h b/arm_compute/runtime/CL/ICLGEMMKernelSelection.h new file mode 100644 index 0000000000..69b941109d --- /dev/null +++ b/arm_compute/runtime/CL/ICLGEMMKernelSelection.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_ICLGEMMKERNELSELECTION_H +#define ARM_COMPUTE_ICLGEMMKERNELSELECTION_H + +#include "arm_compute/core/GPUTarget.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTypes.h" + +namespace arm_compute +{ +namespace cl_gemm +{ +/** Basic interface for the GEMM kernel selection */ +class ICLGEMMKernelSelection +{ +public: + /** Constructor + * + * @param[in] arch GPU target + */ + ICLGEMMKernelSelection(GPUTarget arch) + : _target(arch) + { + } + /** Default Move Constructor. */ + ICLGEMMKernelSelection(ICLGEMMKernelSelection &&) = default; + /** Default move assignment operator */ + ICLGEMMKernelSelection &operator=(ICLGEMMKernelSelection &&) = default; + /** Virtual destructor */ + virtual ~ICLGEMMKernelSelection() = default; + /** Given the input parameters passed through @ref CLGEMMKernelSelectionParams, this method returns the @ref CLGEMMKernelType to use + * + * @param[in] params Input parameters used by the function to return the OpenCL GEMM's kernel + * + * @return @ref CLGEMMKernelType + */ + virtual CLGEMMKernelType select_kernel(const CLGEMMKernelSelectionParams ¶ms) = 0; + +protected: + GPUTarget _target; /**< GPU target could be used to call a dedicated heuristic for each GPU IP for a given GPU architecture */ +}; +} // namespace cl_gemm +} // namespace arm_compute +#endif /*ARM_COMPUTE_ICLGEMMKERNELSELECTION_H */ diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h index bb620eb6f7..7a4f12043e 100644 --- a/arm_compute/runtime/CL/functions/CLGEMM.h +++ b/arm_compute/runtime/CL/functions/CLGEMM.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,7 @@ #include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" #include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTypes.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/IWeightsManager.h" @@ -91,10 +92,10 @@ private: /** Basic function to execute GEMM on OpenCL. This function calls the following OpenCL kernels: * * -# @ref CLGEMMReshapeLHSMatrixKernel (only if the RESHAPED_V1 is selected by the heuristic model) - * -# @ref CLGEMMReshapeRHSMatrixKernel (only if either the RESHAPED_V1 or RESHAPED_ONLY_RHS is selected by the select_gemm_type method()) - * -# @ref CLGEMMMatrixMultiplyKernel (only if either the NATIVE or RESHAPED_V1 is selected by the select_gemm_type method()) - * -# @ref CLGEMMMatrixMultiplyReshapedKernel (only if RESHAPED_V1 is selected by the select_gemm_type method()) - * -# @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel (only if RESHAPED_ONLY_RHS is selected by the select_gemm_type method()) + * -# @ref CLGEMMReshapeRHSMatrixKernel (only if either the RESHAPED_V1 or RESHAPED_ONLY_RHS is selected by the select_gemm_kernel method()) + * -# @ref CLGEMMMatrixMultiplyKernel (only if either the NATIVE or RESHAPED_V1 is selected by the select_gemm_kernel method()) + * -# @ref CLGEMMMatrixMultiplyReshapedKernel (only if RESHAPED_V1 is selected by the select_gemm_kernel method()) + * -# @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel (only if RESHAPED_ONLY_RHS is selected by the select_gemm_kernel method()) * */ class CLGEMM : public IFunction @@ -153,25 +154,16 @@ public: void prepare() override; private: - enum class GEMMType - { - NATIVE, - RESHAPED_V1, - RESHAPED_V2, - RESHAPED_ONLY_RHS - }; - - // TODO (COMPMID-2095) - static GEMMType select_gemm_type(unsigned int m, unsigned int n, unsigned int k, DataType data_type, bool reshape_b_only_on_first_run, GPUTarget gpu_target); + static CLGEMMKernelType select_gemm_kernel(unsigned int m, unsigned int n, unsigned int k, DataType data_type, bool reshape_b_only_on_first_run); - void configure_native(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info); + void configure_native_v1(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info); void configure_reshaped_v1(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info); - void configure_reshaped_v2(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info); + void configure_reshaped(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info); void configure_reshaped_only_rhs(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info); - static Status validate_native(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); + static Status validate_native_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); static Status validate_reshaped_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); - static Status validate_reshaped_v2(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); + static Status validate_reshaped(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); static Status validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); MemoryGroup _memory_group; @@ -187,7 +179,7 @@ private: const ICLTensor *_original_b; bool _reshape_b_only_on_first_run; bool _is_prepared; - GEMMType _gemm_type; + CLGEMMKernelType _gemm_kernel_type; }; } // namespace arm_compute diff --git a/arm_compute/runtime/CL/gemm/CLGEMMKernelSelection.h b/arm_compute/runtime/CL/gemm/CLGEMMKernelSelection.h new file mode 100644 index 0000000000..47d787445e --- /dev/null +++ b/arm_compute/runtime/CL/gemm/CLGEMMKernelSelection.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMKERNELSELECTION_H +#define ARM_COMPUTE_CLGEMMKERNELSELECTION_H + +#include "arm_compute/runtime/CL/ICLGEMMKernelSelection.h" +#include "arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h" +#include "arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h" +#include "arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h" + +#include + +namespace arm_compute +{ +namespace cl_gemm +{ +/** CLGEMMKernelSelection factory class */ +class CLGEMMKernelSelectionFactory final +{ +public: + /** Static method to select the GEMM kernel accordingly with the GPU target and GEMM's dimensionality + * + * @param[in] gpu GPU target + * + * @return CLGEMMKernelSelection class + */ + static std::unique_ptr create(GPUTarget gpu) + { + switch(get_arch_from_target(gpu)) + { + case GPUTarget::MIDGARD: + return support::cpp14::make_unique(gpu); + case GPUTarget::BIFROST: + return support::cpp14::make_unique(gpu); + case GPUTarget::VALHALL: + return support::cpp14::make_unique(gpu); + default: + ARM_COMPUTE_ERROR("Not supported GPU target"); + } + } +}; +} // namespace cl_gemm +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMKERNELSELECTION_H */ diff --git a/arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h b/arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h new file mode 100644 index 0000000000..94311fb3aa --- /dev/null +++ b/arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMKERNELSELECTIONBIFROST_H +#define ARM_COMPUTE_CLGEMMKERNELSELECTIONBIFROST_H + +#include "arm_compute/runtime/CL/ICLGEMMKernelSelection.h" + +namespace arm_compute +{ +namespace cl_gemm +{ +/** Bifrost based OpenCL GEMMKernel selection */ +class CLGEMMKernelSelectionBifrost final : public ICLGEMMKernelSelection +{ +public: + /** Constructor + * + * @param[in] gpu GPU target + */ + CLGEMMKernelSelectionBifrost(GPUTarget gpu); + + // Inherited overridden method + CLGEMMKernelType select_kernel(const CLGEMMKernelSelectionParams ¶ms) override; + +private: + CLGEMMKernelType default_f32(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant); + CLGEMMKernelType default_f16(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant); + CLGEMMKernelType default_q8(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant); +}; +} // namespace cl_gemm +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMKERNELSELECTIONBIFROST_H */ diff --git a/arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h b/arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h new file mode 100644 index 0000000000..c0bc4fc507 --- /dev/null +++ b/arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMKERNELSELECTIONMIDGARD_H +#define ARM_COMPUTE_CLGEMMKERNELSELECTIONMIDGARD_H + +#include "arm_compute/runtime/CL/ICLGEMMKernelSelection.h" + +namespace arm_compute +{ +namespace cl_gemm +{ +/** Midgard based OpenCL GEMMKernel selection */ +class CLGEMMKernelSelectionMidgard final : public ICLGEMMKernelSelection +{ +public: + /** Constructor + * + * @param[in] gpu GPU target + */ + CLGEMMKernelSelectionMidgard(GPUTarget gpu); + + // Inherited overridden method + CLGEMMKernelType select_kernel(const CLGEMMKernelSelectionParams ¶ms) override; + +private: + CLGEMMKernelType default_f32(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant); + CLGEMMKernelType default_f16(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant); + CLGEMMKernelType default_q8(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant); +}; +} // namespace cl_gemm +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMKERNELSELECTIONMIDGARD_H */ diff --git a/arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h b/arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h new file mode 100644 index 0000000000..d9006e78f1 --- /dev/null +++ b/arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLGEMMKERNELSELECTIONVALHALL_H +#define ARM_COMPUTE_CLGEMMKERNELSELECTIONVALHALL_H + +#include "arm_compute/runtime/CL/ICLGEMMKernelSelection.h" + +namespace arm_compute +{ +namespace cl_gemm +{ +/** Valhall based OpenCL GEMMKernel selection */ +class CLGEMMKernelSelectionValhall final : public ICLGEMMKernelSelection +{ +public: + /** Constructor + * + * @param[in] gpu GPU target + */ + CLGEMMKernelSelectionValhall(GPUTarget gpu); + + // Inherited overridden method + CLGEMMKernelType select_kernel(const CLGEMMKernelSelectionParams ¶ms) override; + +private: + CLGEMMKernelType default_f32(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant); + CLGEMMKernelType default_f16(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant); + CLGEMMKernelType default_q8(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant); +}; +} // namespace cl_gemm +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLGEMMKERNELSELECTIONVALHALL_H */ diff --git a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.cpp b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.cpp index b44b297722..951447e1a0 100644 --- a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.cpp +++ b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.cpp @@ -75,17 +75,26 @@ std::pair CLGEMMReshapedOnlyRHSKernelConfi if(m == 1) { - return configure_lhs_rhs_info(m, n, 1, 4, 8, 1, 8, false, true, false, true); + if(n > 2048) + { + return configure_lhs_rhs_info(m, n, 1, 8, 2, 1, 256, false, true, false, true); + } + else + { + return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 256, false, true, false, true); + } } else { - if(m % 5) + if(m > 300) { - return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 8, false, true, false, true); + const int v0 = std::max(std::min(static_cast(n / 4), static_cast(256)), static_cast(1)); + return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, v0, false, true, false, true); } else { - return configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 8, false, true, false, true); + const int v0 = std::max(std::min(static_cast(n / 4), static_cast(256)), static_cast(1)); + return configure_lhs_rhs_info(m, n, 2, 4, 4, 1, v0, false, true, false, true); } } } @@ -97,12 +106,20 @@ std::pair CLGEMMReshapedOnlyRHSKernelConfi if(m == 1) { - const unsigned int h0 = std::max(n / 2, 1U); - return configure_lhs_rhs_info(m, n, 1, 4, 8, 1, h0, false, true, false, true); + const int h0 = std::max(std::min(static_cast(n / 4), static_cast(256)), static_cast(1)); + return configure_lhs_rhs_info(m, n, 1, 4, 4, 1, h0, false, true, false, true); } else { - return configure_lhs_rhs_info(m, n, 4, 8, 8, 1, 4, false, true, false, true); + const int h0 = std::max(std::min(static_cast(n / 4), static_cast(256)), static_cast(1)); + if(k > 512) + { + return configure_lhs_rhs_info(m, n, 2, 4, 16, 1, h0, false, true, false, false); + } + else + { + return configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, false, true, false, false); + } } } diff --git a/src/runtime/CL/functions/CLGEMM.cpp b/src/runtime/CL/functions/CLGEMM.cpp index 09b6397056..74d59cdad1 100644 --- a/src/runtime/CL/functions/CLGEMM.cpp +++ b/src/runtime/CL/functions/CLGEMM.cpp @@ -39,6 +39,7 @@ #include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/gemm/CLGEMMKernelSelection.h" #include "arm_compute/runtime/ITensorAllocator.h" namespace arm_compute @@ -61,79 +62,26 @@ CLGEMM::CLGEMM(std::shared_ptr memory_manager, IWeightsManager * _original_b(nullptr), _reshape_b_only_on_first_run(false), _is_prepared(false), - _gemm_type(GEMMType::NATIVE) + _gemm_kernel_type(CLGEMMKernelType::NATIVE_V1) { } -CLGEMM::GEMMType CLGEMM::select_gemm_type(unsigned int m, unsigned int n, unsigned int k, DataType data_type, bool reshape_b_only_on_first_run, GPUTarget gpu_target) +CLGEMMKernelType CLGEMM::select_gemm_kernel(unsigned int m, unsigned int n, unsigned int k, DataType data_type, bool reshape_b_only_on_first_run) { - GEMMType gemm_type = GEMMType::RESHAPED_V1; + std::unique_ptr gemm_kernel = CLGEMMKernelSelectionFactory::create(CLScheduler::get().target()); + ARM_COMPUTE_ERROR_ON_NULLPTR(gemm_kernel.get()); - if(gpu_target_is_in(gpu_target, GPUTarget::G51, GPUTarget::G51BIG, GPUTarget::G51LIT, - GPUTarget::G52, GPUTarget::G52LIT, GPUTarget::G71, GPUTarget::G72, - GPUTarget::G76, GPUTarget::G77)) - { - if(data_type == DataType::F32) - { - if((m > 1) && (n < 16)) - { - gemm_type = GEMMType::RESHAPED_V2; - } - else if(m == 1) - { - gemm_type = GEMMType::RESHAPED_ONLY_RHS; - } - else - { - // COMPMID-852 - if((k > 256) && (m > 4) && reshape_b_only_on_first_run) - { - constexpr float alpha = 3.2f; - constexpr float fact0 = 1.51f; - constexpr float fact1 = 1.66f; - constexpr float ops = 12.0f; - const float scale = k > 1024 ? 1.07f : 1.0f; - gemm_type = (alpha + ((n * fact0) / ops) < ((fact1 * n * scale) / ops)) ? GEMMType::RESHAPED_V2 : GEMMType::RESHAPED_V2; - } - else - { - gemm_type = GEMMType::RESHAPED_ONLY_RHS; - } - } + CLGEMMKernelSelectionParams params; + params.m = m; + params.n = n; + params.k = k; + params.is_rhs_constant = reshape_b_only_on_first_run; + params.data_type = data_type; - const auto workload = static_cast((m * n) / 20.0f); - - gemm_type = ((workload > 1600.0f) && (gemm_type == GEMMType::RESHAPED_V1) && (data_type == DataType::F32)) ? GEMMType::RESHAPED_V2 : gemm_type; - } - else - { - if((m == 1) || (!reshape_b_only_on_first_run)) - { - if((n > k) && gpu_target_is_in(gpu_target, GPUTarget::G71)) - { - gemm_type = GEMMType::NATIVE; - } - else - { - gemm_type = GEMMType::RESHAPED_ONLY_RHS; - } - } - else - { - gemm_type = GEMMType::RESHAPED_V2; - } - } - } - else - { - // We reshape the matrices only if we do not have the vector-by-matrix case and we reshape the matrix B only once - gemm_type = ((m != 1) && reshape_b_only_on_first_run) ? GEMMType::RESHAPED_V1 : GEMMType::NATIVE; - } - - return gemm_type; + return gemm_kernel->select_kernel(params); } -void CLGEMM::configure_native(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info) +void CLGEMM::configure_native_v1(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info) { const unsigned int m = gemm_info.reinterpret_input_as_3d() ? (a->info()->dimension(1) * a->info()->dimension(2)) : a->info()->dimension(1); const unsigned int n = b->info()->dimension(0); @@ -228,7 +176,7 @@ void CLGEMM::configure_reshaped_v1(const ICLTensor *a, const ICLTensor *b, const } } -void CLGEMM::configure_reshaped_v2(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info) +void CLGEMM::configure_reshaped(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info) { DataType data_type = a->info()->data_type(); bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); @@ -362,7 +310,7 @@ void CLGEMM::configure_reshaped_only_rhs(const ICLTensor *a, const ICLTensor *b, } } -Status CLGEMM::validate_native(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info) +Status CLGEMM::validate_native_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info) { ARM_COMPUTE_UNUSED(alpha); ARM_COMPUTE_UNUSED(output); @@ -438,7 +386,7 @@ Status CLGEMM::validate_reshaped_v1(const ITensorInfo *a, const ITensorInfo *b, return Status{}; } -Status CLGEMM::validate_reshaped_v2(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info) +Status CLGEMM::validate_reshaped(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info) { ARM_COMPUTE_UNUSED(alpha); ARM_COMPUTE_UNUSED(output); @@ -547,37 +495,36 @@ void CLGEMM::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor * _original_b = b; // Get the GPU target - const GPUTarget gpu_target = CLScheduler::get().target(); bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); const unsigned int m = reinterpret_input_as_3d ? (a->info()->dimension(1) * a->info()->dimension(2)) : a->info()->dimension(1); const unsigned int n = b->info()->dimension(0); const unsigned int k = a->info()->dimension(0); // Select GEMMType - _gemm_type = select_gemm_type(m, n, k, a->info()->data_type(), _reshape_b_only_on_first_run, gpu_target); + _gemm_kernel_type = select_gemm_kernel(m, n, k, a->info()->data_type(), _reshape_b_only_on_first_run); const bool fuse_add_c = (!(helpers::float_ops::is_zero(beta)) && c != nullptr); const ICLTensor *c_to_use = fuse_add_c ? c : nullptr; - switch(_gemm_type) + switch(_gemm_kernel_type) { - case GEMMType::NATIVE: + case CLGEMMKernelType::NATIVE_V1: { - configure_native(a, b, c_to_use, output, alpha, beta, gemm_info); + configure_native_v1(a, b, c_to_use, output, alpha, beta, gemm_info); break; } - case GEMMType::RESHAPED_V1: + case CLGEMMKernelType::RESHAPED_V1: { configure_reshaped_v1(a, b, c_to_use, output, alpha, beta, gemm_info); break; } - case GEMMType::RESHAPED_V2: + case CLGEMMKernelType::RESHAPED: { - configure_reshaped_v2(a, b, c_to_use, output, alpha, beta, gemm_info); + configure_reshaped(a, b, c_to_use, output, alpha, beta, gemm_info); break; } - case GEMMType::RESHAPED_ONLY_RHS: + case CLGEMMKernelType::RESHAPED_ONLY_RHS: { configure_reshaped_only_rhs(a, b, c_to_use, output, alpha, beta, gemm_info); break; @@ -592,37 +539,36 @@ void CLGEMM::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor * Status CLGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info) { // Get the GPU target - const GPUTarget gpu_target = CLScheduler::get().target(); bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); const unsigned int n = b->dimension(0); const unsigned int k = a->dimension(0); // Select GEMMType - GEMMType gemm_type = select_gemm_type(m, n, k, a->data_type(), gemm_info.reshape_b_only_on_first_run(), gpu_target); + CLGEMMKernelType gemm_kernel_type = select_gemm_kernel(m, n, k, a->data_type(), gemm_info.reshape_b_only_on_first_run()); const bool fuse_add_c = (!(helpers::float_ops::is_zero(beta)) && c != nullptr); const ITensorInfo *c_to_use = fuse_add_c ? c : nullptr; - switch(gemm_type) + switch(gemm_kernel_type) { - case GEMMType::NATIVE: + case CLGEMMKernelType::NATIVE_V1: { - ARM_COMPUTE_RETURN_ON_ERROR(validate_native(a, b, c_to_use, output, alpha, beta, gemm_info)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_native_v1(a, b, c_to_use, output, alpha, beta, gemm_info)); break; } - case GEMMType::RESHAPED_V1: + case CLGEMMKernelType::RESHAPED_V1: { ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped_v1(a, b, c_to_use, output, alpha, beta, gemm_info)); break; } - case GEMMType::RESHAPED_V2: + case CLGEMMKernelType::RESHAPED: { - ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped_v2(a, b, c_to_use, output, alpha, beta, gemm_info)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped(a, b, c_to_use, output, alpha, beta, gemm_info)); break; } - case GEMMType::RESHAPED_ONLY_RHS: + case CLGEMMKernelType::RESHAPED_ONLY_RHS: { ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped_only_rhs(a, b, c_to_use, output, alpha, beta, gemm_info)); break; @@ -643,14 +589,14 @@ void CLGEMM::run() MemoryGroupResourceScope scope_mg(_memory_group); // Run matrix multiply kernel - switch(_gemm_type) + switch(_gemm_kernel_type) { - case GEMMType::NATIVE: + case CLGEMMKernelType::NATIVE_V1: { CLScheduler::get().enqueue(_mm_kernel, true); break; } - case GEMMType::RESHAPED_V1: + case CLGEMMKernelType::RESHAPED_V1: { // Run interleave kernel CLScheduler::get().enqueue(_reshape_lhs_kernel, false); @@ -671,7 +617,7 @@ void CLGEMM::run() CLScheduler::get().enqueue(_mm_kernel, true); break; } - case GEMMType::RESHAPED_V2: + case CLGEMMKernelType::RESHAPED: { // Run interleave kernel CLScheduler::get().enqueue(_reshape_lhs_kernel, false); @@ -692,7 +638,7 @@ void CLGEMM::run() CLScheduler::get().enqueue(_mm_reshaped_kernel, true); break; } - case GEMMType::RESHAPED_ONLY_RHS: + case CLGEMMKernelType::RESHAPED_ONLY_RHS: { if(!_reshape_b_only_on_first_run) { @@ -721,7 +667,7 @@ void CLGEMM::prepare() { if(!_is_prepared) { - if(_gemm_type != GEMMType::NATIVE && _reshape_b_only_on_first_run) + if(_gemm_kernel_type != CLGEMMKernelType::NATIVE_V1 && _reshape_b_only_on_first_run) { if(_weights_manager && _weights_manager->are_weights_managed(_original_b)) { diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp b/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp new file mode 100644 index 0000000000..4542f53136 --- /dev/null +++ b/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h" + +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h" + +#include +#include + +namespace arm_compute +{ +namespace cl_gemm +{ +CLGEMMKernelSelectionBifrost::CLGEMMKernelSelectionBifrost(GPUTarget gpu) + : ICLGEMMKernelSelection(gpu) +{ +} + +CLGEMMKernelType CLGEMMKernelSelectionBifrost::select_kernel(const CLGEMMKernelSelectionParams ¶ms) +{ + // _target could be used in the future to have a dedicated heuristic for each GPU IP + ARM_COMPUTE_UNUSED(_target); + + using FunctionExecutorPtr = CLGEMMKernelType (CLGEMMKernelSelectionBifrost::*)(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant); + + // Configurations for Bifrost architectures + static std::map gemm_configs = + { + { DataType::F32, &CLGEMMKernelSelectionBifrost::default_f32 }, + { DataType::F16, &CLGEMMKernelSelectionBifrost::default_f16 }, + { DataType::QASYMM8, &CLGEMMKernelSelectionBifrost::default_q8 }, + { DataType::QASYMM8_SIGNED, &CLGEMMKernelSelectionBifrost::default_q8 }, + { DataType::QSYMM8, &CLGEMMKernelSelectionBifrost::default_q8 }, + { DataType::QSYMM8_PER_CHANNEL, &CLGEMMKernelSelectionBifrost::default_q8 } + }; + + const DataType data_type = params.data_type; + + if(gemm_configs.find(data_type) != gemm_configs.end()) + { + return (this->*gemm_configs[data_type])(params.m, params.n, params.k, params.is_rhs_constant); + } + + ARM_COMPUTE_ERROR("Not supported data type"); +} + +CLGEMMKernelType CLGEMMKernelSelectionBifrost::default_f32(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant) +{ + CLGEMMKernelType gemm_type = CLGEMMKernelType::NATIVE_V1; + + if(is_rhs_constant) + { + if((m > 1) && (n < 16)) + { + gemm_type = CLGEMMKernelType::RESHAPED; + } + else if(m == 1) + { + gemm_type = CLGEMMKernelType::RESHAPED_ONLY_RHS; + } + else + { + if((k > 256) && (m > 4)) + { + gemm_type = CLGEMMKernelType::RESHAPED; + } + else + { + gemm_type = CLGEMMKernelType::RESHAPED_ONLY_RHS; + } + } + } + + return gemm_type; +} + +CLGEMMKernelType CLGEMMKernelSelectionBifrost::default_f16(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant) +{ + if(is_rhs_constant) + { + if(m == 1) + { + if((n > k) && gpu_target_is_in(_target, GPUTarget::G71)) + { + return CLGEMMKernelType::NATIVE_V1; + } + else + { + return CLGEMMKernelType::RESHAPED_ONLY_RHS; + } + } + else + { + return CLGEMMKernelType::RESHAPED; + } + } + else + { + return CLGEMMKernelType::NATIVE_V1; + } +} + +CLGEMMKernelType CLGEMMKernelSelectionBifrost::default_q8(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant) +{ + if(is_rhs_constant) + { + if(m == 1) + { + if((n > k) && gpu_target_is_in(_target, GPUTarget::G71)) + { + return CLGEMMKernelType::NATIVE_V1; + } + else + { + return CLGEMMKernelType::RESHAPED_ONLY_RHS; + } + } + else + { + return CLGEMMKernelType::RESHAPED; + } + } + else + { + return CLGEMMKernelType::NATIVE_V1; + } +} +} // namespace cl_gemm +} // namespace arm_compute diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.cpp b/src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.cpp new file mode 100644 index 0000000000..b7bb720175 --- /dev/null +++ b/src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.cpp @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h" + +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h" +#include "arm_compute/core/GPUTarget.h" + +#include +#include + +namespace arm_compute +{ +namespace cl_gemm +{ +CLGEMMKernelSelectionMidgard::CLGEMMKernelSelectionMidgard(GPUTarget gpu) + : ICLGEMMKernelSelection(gpu) +{ +} + +CLGEMMKernelType CLGEMMKernelSelectionMidgard::select_kernel(const CLGEMMKernelSelectionParams ¶ms) +{ + // _target could be used in the future to have a dedicated heuristic for each GPU IP + ARM_COMPUTE_UNUSED(_target); + + using FunctionExecutorPtr = CLGEMMKernelType (CLGEMMKernelSelectionMidgard::*)(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant); + + // Configurations for Midgard architectures + static std::map gemm_configs = + { + { DataType::F32, &CLGEMMKernelSelectionMidgard::default_f32 }, + { DataType::F16, &CLGEMMKernelSelectionMidgard::default_f16 }, + { DataType::QASYMM8, &CLGEMMKernelSelectionMidgard::default_q8 }, + { DataType::QASYMM8_SIGNED, &CLGEMMKernelSelectionMidgard::default_q8 }, + { DataType::QSYMM8, &CLGEMMKernelSelectionMidgard::default_q8 }, + { DataType::QSYMM8_PER_CHANNEL, &CLGEMMKernelSelectionMidgard::default_q8 } + }; + + const DataType data_type = params.data_type; + + if(gemm_configs.find(data_type) != gemm_configs.end()) + { + return (this->*gemm_configs[data_type])(params.m, params.n, params.k, params.is_rhs_constant); + } + + ARM_COMPUTE_ERROR("Not supported data type"); +} + +CLGEMMKernelType CLGEMMKernelSelectionMidgard::default_f32(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant) +{ + ARM_COMPUTE_UNUSED(n, k); + + // We reshape the matrices only if we do not have the vector-by-matrix case and we reshape the matrix B only once + return ((m != 1) && is_rhs_constant) ? CLGEMMKernelType::RESHAPED_V1 : CLGEMMKernelType::NATIVE_V1; +} + +CLGEMMKernelType CLGEMMKernelSelectionMidgard::default_f16(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant) +{ + ARM_COMPUTE_UNUSED(n, k); + + // We reshape the matrices only if we do not have the vector-by-matrix case and we reshape the matrix B only once + return ((m != 1) && is_rhs_constant) ? CLGEMMKernelType::RESHAPED_V1 : CLGEMMKernelType::NATIVE_V1; +} + +CLGEMMKernelType CLGEMMKernelSelectionMidgard::default_q8(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant) +{ + ARM_COMPUTE_UNUSED(n, k); + + // We reshape the matrices only if we do not have the vector-by-matrix case and we reshape the matrix B only once + return ((m != 1) && is_rhs_constant) ? CLGEMMKernelType::RESHAPED_V1 : CLGEMMKernelType::NATIVE_V1; +} +} // namespace cl_gemm +} // namespace arm_compute diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.cpp b/src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.cpp new file mode 100644 index 0000000000..8016417eb9 --- /dev/null +++ b/src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.cpp @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h" + +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h" + +#include +#include + +namespace arm_compute +{ +namespace cl_gemm +{ +CLGEMMKernelSelectionValhall::CLGEMMKernelSelectionValhall(GPUTarget gpu) + : ICLGEMMKernelSelection(gpu) +{ +} + +CLGEMMKernelType CLGEMMKernelSelectionValhall::select_kernel(const CLGEMMKernelSelectionParams ¶ms) +{ + // _target could be used in the future to have a dedicated heuristic for each GPU IP + ARM_COMPUTE_UNUSED(_target); + + using FunctionExecutorPtr = CLGEMMKernelType (CLGEMMKernelSelectionValhall::*)(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant); + + // Configurations for Valhall architectures + static std::map gemm_configs = + { + { DataType::F32, &CLGEMMKernelSelectionValhall::default_f32 }, + { DataType::F16, &CLGEMMKernelSelectionValhall::default_f16 }, + { DataType::QASYMM8, &CLGEMMKernelSelectionValhall::default_q8 }, + { DataType::QASYMM8_SIGNED, &CLGEMMKernelSelectionValhall::default_q8 }, + { DataType::QSYMM8, &CLGEMMKernelSelectionValhall::default_q8 }, + { DataType::QSYMM8_PER_CHANNEL, &CLGEMMKernelSelectionValhall::default_q8 } + }; + + const DataType data_type = params.data_type; + + if(gemm_configs.find(data_type) != gemm_configs.end()) + { + return (this->*gemm_configs[data_type])(params.m, params.n, params.k, params.is_rhs_constant); + } + + ARM_COMPUTE_ERROR("Not supported data type"); +} + +CLGEMMKernelType CLGEMMKernelSelectionValhall::default_f32(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant) +{ + ARM_COMPUTE_UNUSED(m, n, k); + + return is_rhs_constant ? CLGEMMKernelType::RESHAPED_ONLY_RHS : CLGEMMKernelType::NATIVE_V1; +} + +CLGEMMKernelType CLGEMMKernelSelectionValhall::default_f16(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant) +{ + ARM_COMPUTE_UNUSED(m, n, k); + + return is_rhs_constant ? CLGEMMKernelType::RESHAPED_ONLY_RHS : CLGEMMKernelType::NATIVE_V1; +} + +CLGEMMKernelType CLGEMMKernelSelectionValhall::default_q8(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant) +{ + ARM_COMPUTE_UNUSED(n, k); + + if(is_rhs_constant) + { + if(m == 1) + { + return CLGEMMKernelType::RESHAPED_ONLY_RHS; + } + else + { + return CLGEMMKernelType::RESHAPED; + } + } + else + { + return CLGEMMKernelType::NATIVE_V1; + } +} +} // namespace cl_gemm +} // namespace arm_compute -- cgit v1.2.1