diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-10-14 19:03:09 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-10-23 12:08:12 +0000 |
commit | 48b3ef89de5f21a0169d8416e3d54081f82c7bf8 (patch) | |
tree | f857d733ccf446c704823dc7ac796a96eb55095e /src/core/NEON/kernels/assembly | |
parent | 1dce3101ef8d77c8cf0af7dfd4af6595a0136b91 (diff) | |
download | ComputeLibrary-48b3ef89de5f21a0169d8416e3d54081f82c7bf8.tar.gz |
COMPMID-2577: Fuse bias addition and activation in gemm assembly kernels
Change-Id: I7f52112d2d05b1ea3d3f3d4b19b8eafab05d6c44
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2141
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/assembly')
3 files changed, 5 insertions, 368 deletions
diff --git a/src/core/NEON/kernels/assembly/Helpers.cpp b/src/core/NEON/kernels/assembly/Helpers.cpp index 3d8d66d7fc..93ea6c8d5e 100644 --- a/src/core/NEON/kernels/assembly/Helpers.cpp +++ b/src/core/NEON/kernels/assembly/Helpers.cpp @@ -24,16 +24,13 @@ #include "arm_compute/core/NEON/kernels/assembly/Helpers.h" -#include "arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp" - namespace arm_compute { arm_gemm::KernelDescription get_gemm_info(DataType input_type, const CPUInfo &ci, const unsigned int num_threads, const INEGEMMWrapperKernel::Params &p, - float alpha, - float beta, + arm_gemm::Activation activation, bool pretranspose_hint) { switch(input_type) @@ -42,25 +39,25 @@ arm_gemm::KernelDescription get_gemm_info(DataType in case DataType::QASYMM8: case DataType::U8: { - arm_gemm::GemmArgs<uint32_t> args(&ci, p.M, p.N, p.K, p.batches, p.multis, false, false, alpha, beta, num_threads, pretranspose_hint); + arm_gemm::GemmArgs args(&ci, p.M, p.N, p.K, p.batches, p.multis, false, false, activation, num_threads, pretranspose_hint); return arm_gemm::get_gemm_method<uint8_t, uint32_t>(args); } case DataType::S8: { - arm_gemm::GemmArgs<int32_t> args(&ci, p.M, p.N, p.K, p.batches, p.multis, false, false, alpha, beta, num_threads, pretranspose_hint); + arm_gemm::GemmArgs args(&ci, p.M, p.N, p.K, p.batches, p.multis, false, false, activation, num_threads, pretranspose_hint); return arm_gemm::get_gemm_method<int8_t, int32_t>(args); } #endif // __aarch64__ #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: { - arm_gemm::GemmArgs<__fp16> args(&ci, p.M, p.N, p.K, p.batches, p.multis, false, false, alpha, beta, num_threads, pretranspose_hint); + arm_gemm::GemmArgs args(&ci, p.M, p.N, p.K, p.batches, p.multis, false, false, activation, num_threads, pretranspose_hint); return arm_gemm::get_gemm_method<__fp16, __fp16>(args); } #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ case DataType::F32: { - arm_gemm::GemmArgs<float> args(&ci, p.M, p.N, p.K, p.batches, p.multis, false, false, alpha, beta, num_threads, pretranspose_hint); + arm_gemm::GemmArgs args(&ci, p.M, p.N, p.K, p.batches, p.multis, false, false, activation, num_threads, pretranspose_hint); return arm_gemm::get_gemm_method<float, float>(args); } default: diff --git a/src/core/NEON/kernels/assembly/NEGEMMInterleavedStrategies.h b/src/core/NEON/kernels/assembly/NEGEMMInterleavedStrategies.h deleted file mode 100644 index 6e30148b5d..0000000000 --- a/src/core/NEON/kernels/assembly/NEGEMMInterleavedStrategies.h +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_NEGEMMINTERLEAVEDSTRATEGIES_H__ -#define __ARM_COMPUTE_NEGEMMINTERLEAVEDSTRATEGIES_H__ - -#include "../arm_gemm/utils.hpp" -#include "arm_gemm.hpp" - -#include "../arm_gemm/mergeresults.hpp" -#include "../arm_gemm/transform.hpp" - -#include "../arm_gemm/kernels/a32_sgemm_8x6.hpp" -#include "../arm_gemm/kernels/a64_gemm_s8_12x8.hpp" -#include "../arm_gemm/kernels/a64_gemm_s8_4x4.hpp" -#include "../arm_gemm/kernels/a64_gemm_u8_12x8.hpp" -#include "../arm_gemm/kernels/a64_gemm_u8_4x4.hpp" -#include "../arm_gemm/kernels/a64_hgemm_24x8.hpp" -#include "../arm_gemm/kernels/a64_sgemm_12x8.hpp" -#include "../arm_gemm/kernels/sve_interleaved_fp16_mla_3VLx8.hpp" -#include "../arm_gemm/kernels/sve_interleaved_fp32_mla_3VLx8.hpp" -#include "../arm_gemm/kernels/sve_interleaved_s8s32_dot_3VLx8.hpp" -#include "../arm_gemm/kernels/sve_interleaved_u8u32_dot_3VLx8.hpp" - -namespace arm_compute -{ -namespace detail -{ -/** GEMM Interleaved Strategy interface */ -class IInterleavedStrategy -{ -public: - /** Virtual Destructor */ - virtual ~IInterleavedStrategy() = default; - /** Return output height of the interleaved strategy - * - * @return Output height of strategy - */ - virtual unsigned int out_height() const = 0; - /** Instantiate and configure a prepareB Kernel - * - * @param[in] b Input tensor B. - * @param[in] transformed_b Reshaped tensor B. - * @param[in] params GM, N, K sizes. - * @param[in] ci CPUInfo to be used for kernel configuration. - * - * @return A wrapped specialized prepareB kernel - */ - virtual std::unique_ptr<NEGEMMInterleavedPrepareBWrapperKernel> instantiate_prepareB(const ITensor *b, - ITensor *transformed_b, - const INEGEMMWrapperKernel::Params ¶ms, - const CPUInfo &ci) = 0; - /** Instantiate and configure a transformA Kernel - * - * @param[in] a Input tensor A. - * @param[in] transformed_a Reshaped tensor A. - * @param[in] block_walker Window representing the layout of the matrix's blocks. - * @param[in] params M, N, K sizes. - * @param[in] gemm_info GEMM meta-data - * - * @return A wrapped specialized transformA kernel - */ - virtual std::unique_ptr<NEGEMMInterleavedTransformAWrapper> instantiate_transformA(const ITensor *a, - ITensor *transformed_a, - const Window &block_walker, - const INEGEMMWrapperKernel::Params ¶ms, - const GEMMInfo &gemm_info) = 0; - /** Instantiate and configure a prepareB Kernel - * - * @param[in] transformed_a Already reshaped tensor A. - * @param[in] transformed_b Already reshaped tensor B. - * @param[in] tmp_c Temporary buffer to be used to store intermediate results. - * @param[in] c Result tensor C. - * @param[in] block_walker Window containing iteration information for the M and batch dimensions. - * @param[in] block_sizes Block sizes to use for the matrix multiplication (A & B must have been reshaped using these same block sizes). - * @param[in] params M, N, K sizes. - * @param[in] alpha Alpha value - * @param[in] beta Beta value - * @param[in] gemm_info GEMM meta-data - * @param[in] num_threads Maximum number of threads that might be used for the calculations. - * - * @return A wrapped specialized MatrixMultiply kernel - */ - virtual std::unique_ptr<NEGEMMInterleavedMatrixMultiplyWrapper> instantiate_matrix_multiply(const ITensor *transformed_a, const ITensor *transformed_b, ITensor *tmp_c, ITensor *c, - const Window &block_walker, const BlockSizes &block_sizes, - const INEGEMMWrapperKernel::Params ¶ms, float alpha, float beta, const GEMMInfo &gemm_info, - unsigned int num_threads) = 0; - /** Calculates the block sizes of a given strategy - * - * @param[in] ci CPUInfo to be used for kernel configuration. - * @param[in] params M, N, K sizes. - * - * @return BlockSizes for a given strategy - */ - virtual BlockSizes calculate_block_sizes_for_strategy(const CPUInfo &ci, const INEGEMMWrapperKernel::Params ¶ms) = 0; -}; - -/** Interleaved Strategy class */ -template <typename StrategyType> -class InterleavedStrategy : public IInterleavedStrategy -{ -public: - using strategy = StrategyType; - -public: - // Inherited methods overridden - unsigned int out_height() const override - { - return strategy::out_height(); - } - std::unique_ptr<NEGEMMInterleavedPrepareBWrapperKernel> instantiate_prepareB(const ITensor *b, - ITensor *transformed_b, - const INEGEMMWrapperKernel::Params ¶ms, - const CPUInfo &ci) override - { - auto prepare_b = support::cpp14::make_unique<NEGEMMInterleavedPrepareBWrapperKernelTemplate<strategy>>(); - prepare_b->configure(b, transformed_b, false, ci, params); - return std::move(prepare_b); - } - std::unique_ptr<NEGEMMInterleavedTransformAWrapper> instantiate_transformA(const ITensor *a, - ITensor *transformed_a, - const Window &block_walker, - const INEGEMMWrapperKernel::Params ¶ms, - const GEMMInfo &gemm_info) override - { - auto transform_a = support::cpp14::make_unique<NEGEMMInterleavedTransformAWrapperTemplate<strategy>>(); - transform_a->configure(a, transformed_a, false, gemm_info.reinterpret_input_as_3d(), block_walker, params); - return std::move(transform_a); - } - std::unique_ptr<NEGEMMInterleavedMatrixMultiplyWrapper> instantiate_matrix_multiply(const ITensor *transformed_a, const ITensor *transformed_b, ITensor *tmp_c, ITensor *c, - const Window &block_walker, const BlockSizes &block_sizes, - const INEGEMMWrapperKernel::Params ¶ms, float alpha, float beta, const GEMMInfo &gemm_info, - unsigned int num_threads) override - { - auto matrix_multiply = support::cpp14::make_unique<NEGEMMInterleavedMatrixMultiplyWrapperTemplate<strategy>>(); - matrix_multiply->configure(transformed_a, transformed_b, tmp_c, c, block_walker, block_sizes, params, gemm_info, alpha, beta, num_threads); - return std::move(matrix_multiply); - } - - BlockSizes calculate_block_sizes_for_strategy(const CPUInfo &ci, const INEGEMMWrapperKernel::Params ¶ms) override - { - return calculate_block_sizes<strategy>(ci, params.M, params.N, params.K); - } -}; - -/** Create the backend GEMM strategy to use given the provided kernel info - * - * @param[in] kernel_name Kernel name of the backend strategy to instantiate - * - * @return The requested kernel strategy if exists else nullptr - */ -std::unique_ptr<IInterleavedStrategy> create_strategy(const std::string &kernel_name) -{ -#if defined(__arm__) - if(kernel_name.find("sgemm_8x6") != std::string::npos) - { - return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::sgemm_8x6>>(); - } -#endif // defined(__arm__) -#if defined(__aarch64__) - if(kernel_name.find("gemm_s8_4x4") != std::string::npos) - { - return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::gemm_s8_4x4>>(); - } - if(kernel_name.find("gemm_s8_12x8") != std::string::npos) - { - return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::gemm_s8_12x8>>(); - } - if(kernel_name.find("gemm_u8_4x4") != std::string::npos) - { - return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::gemm_u8_4x4>>(); - } - if(kernel_name.find("gemm_u8_12x8") != std::string::npos) - { - return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::gemm_u8_12x8>>(); - } -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - if(kernel_name.find("hgemm_24x8") != std::string::npos) - { - return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::hgemm_24x8>>(); - } -#endif // defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - if(kernel_name.find("sgemm_12x8") != std::string::npos) - { - return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::sgemm_12x8>>(); - } -#if defined(__ARM_FEATURE_SVE) - if(kernel_name.find("interleaved_fp16_mla_3VLx8") != std::string::npos) - { - return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::interleaved_fp16_mla_3VLx8>>(); - } - if(kernel_name.find("interleaved_fp32_mla_3VLx8") != std::string::npos) - { - return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::interleaved_fp32_mla_3VLx8>>(); - } - if(kernel_name.find("interleaved_s8s32_dot_3VLx8") != std::string::npos) - { - return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::interleaved_s8s32_dot_3VLx8>>(); - } - if(kernel_name.find("interleaved_u8u32_dot_3VLx8") != std::string::npos) - { - return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::interleaved_u8u32_dot_3VLx8>>(); - } -#endif // defined(__ARM_FEATURE_SVE) -#endif // defined(__aarch64__)_ - return nullptr; -} -} // namespace detail -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEGEMMINTERLEAVEDSTRATEGIES_H__ */ diff --git a/src/core/NEON/kernels/assembly/NEGEMMNativeWrapperKernel.cpp b/src/core/NEON/kernels/assembly/NEGEMMNativeWrapperKernel.cpp deleted file mode 100644 index ecdb5a938c..0000000000 --- a/src/core/NEON/kernels/assembly/NEGEMMNativeWrapperKernel.cpp +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/NEON/kernels/assembly/NEGEMMNativeWrapperKernel.h" - -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/WindowIterator.h" - -#include "../arm_gemm/utils.hpp" -#include "arm_gemm.hpp" - -#include "../arm_gemm/mergeresults.hpp" -#include "../arm_gemm/transform.hpp" - -#include "../arm_gemm/kernels/a64_sgemm_native_16x4.hpp" - -namespace arm_compute -{ -namespace -{ -template <typename To, typename Tr> -struct Kernel -{ -}; - -#ifdef __aarch64__ -template <> -struct Kernel<float, float> -{ - using strategy = arm_gemm::sgemm_native_16x4; -}; -#endif /* __aarch64__ */ - -} // namespace - -template <typename To, typename Tr> -Window NEGEMMNativeWrapperKernel<To, Tr>::configure_internal(float alpha, float beta) -{ - ARM_COMPUTE_UNUSED(alpha); - using strategy = typename Kernel<To, Tr>::strategy; - - _beta = beta; - - //Note: The window is shifted down by 1 dimension compare to the tensors - Window window; - window.set(Window::DimX, Window::Dimension(0, ceil_to_multiple(_params.M, strategy::out_height()), strategy::out_height())); - window.set(Window::DimY, Window::Dimension(0, _params.batches)); - window.set(Window::DimZ, Window::Dimension(0, _params.multis)); - - return window; -} - -template <typename To, typename Tr> -void NEGEMMNativeWrapperKernel<To, Tr>::run_internal(const Window &window, const Coordinates &start_offset, const Coordinates &end_offset, const ThreadInfo &info) -{ - using strategy = typename Kernel<To, Tr>::strategy; - - TensorAccessor<To> a(*_a); - TensorAccessor<To> b(*_b); - TensorAccessor<Tr> c(*_c); - - // Handle 3d input re-interpretation - if(_gemm_info.reinterpret_input_as_3d()) - { - Strides a_strides_as_3d = _a->info()->strides_in_bytes(); - a_strides_as_3d.remove(Window::DimZ); - a.set_strides(a_strides_as_3d); - } - - // Handle 3d output re-interpretation - if(_gemm_info.depth_output_gemm3d() != 0) - { - Strides c_strides_as_3d = _c->info()->strides_in_bytes(); - c_strides_as_3d.remove(Window::DimZ); - c.set_strides(c_strides_as_3d); - } - - unsigned int m_end = 0; - - strategy strat(info.cpu_info); - auto window_iterator = arm_compute::create_window_iterator(window, start_offset, end_offset, [&](const Coordinates & id) - { - const unsigned int y0 = id.x(); - const unsigned int batch = id.y(); - const unsigned int multi = id.z(); - const unsigned int ymax = std::min(y0 + strategy::out_height(), m_end); - - // TODO(COMPMID-1424) : Agree on gemm IO layouts - strat.kernel(a(0, y0, batch, multi), a.stride(Window::DimY), - b(0, 0, multi), b.stride(Window::DimY), - c(0, y0, batch, multi), c.stride(Window::DimY), - _beta, (ymax - y0), _params.N, _params.K); - }); - - auto on_new_row_size = [&](unsigned int start, unsigned int end) - { - ARM_COMPUTE_UNUSED(start); - m_end = std::min(end, _params.M); - }; - - window_iterator.iterate_3D(on_new_row_size); -} - -#ifdef __aarch64__ -template class NEGEMMNativeWrapperKernel<float, float>; -#endif /* __aarch64__ */ - -} // namespace arm_compute |