aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/assembly
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-10-14 19:03:09 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-10-23 12:08:12 +0000
commit48b3ef89de5f21a0169d8416e3d54081f82c7bf8 (patch)
treef857d733ccf446c704823dc7ac796a96eb55095e /src/core/NEON/kernels/assembly
parent1dce3101ef8d77c8cf0af7dfd4af6595a0136b91 (diff)
downloadComputeLibrary-48b3ef89de5f21a0169d8416e3d54081f82c7bf8.tar.gz
COMPMID-2577: Fuse bias addition and activation in gemm assembly kernels
Change-Id: I7f52112d2d05b1ea3d3f3d4b19b8eafab05d6c44 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/2141 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/assembly')
-rw-r--r--src/core/NEON/kernels/assembly/Helpers.cpp13
-rw-r--r--src/core/NEON/kernels/assembly/NEGEMMInterleavedStrategies.h230
-rw-r--r--src/core/NEON/kernels/assembly/NEGEMMNativeWrapperKernel.cpp130
3 files changed, 5 insertions, 368 deletions
diff --git a/src/core/NEON/kernels/assembly/Helpers.cpp b/src/core/NEON/kernels/assembly/Helpers.cpp
index 3d8d66d7fc..93ea6c8d5e 100644
--- a/src/core/NEON/kernels/assembly/Helpers.cpp
+++ b/src/core/NEON/kernels/assembly/Helpers.cpp
@@ -24,16 +24,13 @@
#include "arm_compute/core/NEON/kernels/assembly/Helpers.h"
-#include "arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp"
-
namespace arm_compute
{
arm_gemm::KernelDescription get_gemm_info(DataType input_type,
const CPUInfo &ci,
const unsigned int num_threads,
const INEGEMMWrapperKernel::Params &p,
- float alpha,
- float beta,
+ arm_gemm::Activation activation,
bool pretranspose_hint)
{
switch(input_type)
@@ -42,25 +39,25 @@ arm_gemm::KernelDescription get_gemm_info(DataType in
case DataType::QASYMM8:
case DataType::U8:
{
- arm_gemm::GemmArgs<uint32_t> args(&ci, p.M, p.N, p.K, p.batches, p.multis, false, false, alpha, beta, num_threads, pretranspose_hint);
+ arm_gemm::GemmArgs args(&ci, p.M, p.N, p.K, p.batches, p.multis, false, false, activation, num_threads, pretranspose_hint);
return arm_gemm::get_gemm_method<uint8_t, uint32_t>(args);
}
case DataType::S8:
{
- arm_gemm::GemmArgs<int32_t> args(&ci, p.M, p.N, p.K, p.batches, p.multis, false, false, alpha, beta, num_threads, pretranspose_hint);
+ arm_gemm::GemmArgs args(&ci, p.M, p.N, p.K, p.batches, p.multis, false, false, activation, num_threads, pretranspose_hint);
return arm_gemm::get_gemm_method<int8_t, int32_t>(args);
}
#endif // __aarch64__
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
{
- arm_gemm::GemmArgs<__fp16> args(&ci, p.M, p.N, p.K, p.batches, p.multis, false, false, alpha, beta, num_threads, pretranspose_hint);
+ arm_gemm::GemmArgs args(&ci, p.M, p.N, p.K, p.batches, p.multis, false, false, activation, num_threads, pretranspose_hint);
return arm_gemm::get_gemm_method<__fp16, __fp16>(args);
}
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
case DataType::F32:
{
- arm_gemm::GemmArgs<float> args(&ci, p.M, p.N, p.K, p.batches, p.multis, false, false, alpha, beta, num_threads, pretranspose_hint);
+ arm_gemm::GemmArgs args(&ci, p.M, p.N, p.K, p.batches, p.multis, false, false, activation, num_threads, pretranspose_hint);
return arm_gemm::get_gemm_method<float, float>(args);
}
default:
diff --git a/src/core/NEON/kernels/assembly/NEGEMMInterleavedStrategies.h b/src/core/NEON/kernels/assembly/NEGEMMInterleavedStrategies.h
deleted file mode 100644
index 6e30148b5d..0000000000
--- a/src/core/NEON/kernels/assembly/NEGEMMInterleavedStrategies.h
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_NEGEMMINTERLEAVEDSTRATEGIES_H__
-#define __ARM_COMPUTE_NEGEMMINTERLEAVEDSTRATEGIES_H__
-
-#include "../arm_gemm/utils.hpp"
-#include "arm_gemm.hpp"
-
-#include "../arm_gemm/mergeresults.hpp"
-#include "../arm_gemm/transform.hpp"
-
-#include "../arm_gemm/kernels/a32_sgemm_8x6.hpp"
-#include "../arm_gemm/kernels/a64_gemm_s8_12x8.hpp"
-#include "../arm_gemm/kernels/a64_gemm_s8_4x4.hpp"
-#include "../arm_gemm/kernels/a64_gemm_u8_12x8.hpp"
-#include "../arm_gemm/kernels/a64_gemm_u8_4x4.hpp"
-#include "../arm_gemm/kernels/a64_hgemm_24x8.hpp"
-#include "../arm_gemm/kernels/a64_sgemm_12x8.hpp"
-#include "../arm_gemm/kernels/sve_interleaved_fp16_mla_3VLx8.hpp"
-#include "../arm_gemm/kernels/sve_interleaved_fp32_mla_3VLx8.hpp"
-#include "../arm_gemm/kernels/sve_interleaved_s8s32_dot_3VLx8.hpp"
-#include "../arm_gemm/kernels/sve_interleaved_u8u32_dot_3VLx8.hpp"
-
-namespace arm_compute
-{
-namespace detail
-{
-/** GEMM Interleaved Strategy interface */
-class IInterleavedStrategy
-{
-public:
- /** Virtual Destructor */
- virtual ~IInterleavedStrategy() = default;
- /** Return output height of the interleaved strategy
- *
- * @return Output height of strategy
- */
- virtual unsigned int out_height() const = 0;
- /** Instantiate and configure a prepareB Kernel
- *
- * @param[in] b Input tensor B.
- * @param[in] transformed_b Reshaped tensor B.
- * @param[in] params GM, N, K sizes.
- * @param[in] ci CPUInfo to be used for kernel configuration.
- *
- * @return A wrapped specialized prepareB kernel
- */
- virtual std::unique_ptr<NEGEMMInterleavedPrepareBWrapperKernel> instantiate_prepareB(const ITensor *b,
- ITensor *transformed_b,
- const INEGEMMWrapperKernel::Params &params,
- const CPUInfo &ci) = 0;
- /** Instantiate and configure a transformA Kernel
- *
- * @param[in] a Input tensor A.
- * @param[in] transformed_a Reshaped tensor A.
- * @param[in] block_walker Window representing the layout of the matrix's blocks.
- * @param[in] params M, N, K sizes.
- * @param[in] gemm_info GEMM meta-data
- *
- * @return A wrapped specialized transformA kernel
- */
- virtual std::unique_ptr<NEGEMMInterleavedTransformAWrapper> instantiate_transformA(const ITensor *a,
- ITensor *transformed_a,
- const Window &block_walker,
- const INEGEMMWrapperKernel::Params &params,
- const GEMMInfo &gemm_info) = 0;
- /** Instantiate and configure a prepareB Kernel
- *
- * @param[in] transformed_a Already reshaped tensor A.
- * @param[in] transformed_b Already reshaped tensor B.
- * @param[in] tmp_c Temporary buffer to be used to store intermediate results.
- * @param[in] c Result tensor C.
- * @param[in] block_walker Window containing iteration information for the M and batch dimensions.
- * @param[in] block_sizes Block sizes to use for the matrix multiplication (A & B must have been reshaped using these same block sizes).
- * @param[in] params M, N, K sizes.
- * @param[in] alpha Alpha value
- * @param[in] beta Beta value
- * @param[in] gemm_info GEMM meta-data
- * @param[in] num_threads Maximum number of threads that might be used for the calculations.
- *
- * @return A wrapped specialized MatrixMultiply kernel
- */
- virtual std::unique_ptr<NEGEMMInterleavedMatrixMultiplyWrapper> instantiate_matrix_multiply(const ITensor *transformed_a, const ITensor *transformed_b, ITensor *tmp_c, ITensor *c,
- const Window &block_walker, const BlockSizes &block_sizes,
- const INEGEMMWrapperKernel::Params &params, float alpha, float beta, const GEMMInfo &gemm_info,
- unsigned int num_threads) = 0;
- /** Calculates the block sizes of a given strategy
- *
- * @param[in] ci CPUInfo to be used for kernel configuration.
- * @param[in] params M, N, K sizes.
- *
- * @return BlockSizes for a given strategy
- */
- virtual BlockSizes calculate_block_sizes_for_strategy(const CPUInfo &ci, const INEGEMMWrapperKernel::Params &params) = 0;
-};
-
-/** Interleaved Strategy class */
-template <typename StrategyType>
-class InterleavedStrategy : public IInterleavedStrategy
-{
-public:
- using strategy = StrategyType;
-
-public:
- // Inherited methods overridden
- unsigned int out_height() const override
- {
- return strategy::out_height();
- }
- std::unique_ptr<NEGEMMInterleavedPrepareBWrapperKernel> instantiate_prepareB(const ITensor *b,
- ITensor *transformed_b,
- const INEGEMMWrapperKernel::Params &params,
- const CPUInfo &ci) override
- {
- auto prepare_b = support::cpp14::make_unique<NEGEMMInterleavedPrepareBWrapperKernelTemplate<strategy>>();
- prepare_b->configure(b, transformed_b, false, ci, params);
- return std::move(prepare_b);
- }
- std::unique_ptr<NEGEMMInterleavedTransformAWrapper> instantiate_transformA(const ITensor *a,
- ITensor *transformed_a,
- const Window &block_walker,
- const INEGEMMWrapperKernel::Params &params,
- const GEMMInfo &gemm_info) override
- {
- auto transform_a = support::cpp14::make_unique<NEGEMMInterleavedTransformAWrapperTemplate<strategy>>();
- transform_a->configure(a, transformed_a, false, gemm_info.reinterpret_input_as_3d(), block_walker, params);
- return std::move(transform_a);
- }
- std::unique_ptr<NEGEMMInterleavedMatrixMultiplyWrapper> instantiate_matrix_multiply(const ITensor *transformed_a, const ITensor *transformed_b, ITensor *tmp_c, ITensor *c,
- const Window &block_walker, const BlockSizes &block_sizes,
- const INEGEMMWrapperKernel::Params &params, float alpha, float beta, const GEMMInfo &gemm_info,
- unsigned int num_threads) override
- {
- auto matrix_multiply = support::cpp14::make_unique<NEGEMMInterleavedMatrixMultiplyWrapperTemplate<strategy>>();
- matrix_multiply->configure(transformed_a, transformed_b, tmp_c, c, block_walker, block_sizes, params, gemm_info, alpha, beta, num_threads);
- return std::move(matrix_multiply);
- }
-
- BlockSizes calculate_block_sizes_for_strategy(const CPUInfo &ci, const INEGEMMWrapperKernel::Params &params) override
- {
- return calculate_block_sizes<strategy>(ci, params.M, params.N, params.K);
- }
-};
-
-/** Create the backend GEMM strategy to use given the provided kernel info
- *
- * @param[in] kernel_name Kernel name of the backend strategy to instantiate
- *
- * @return The requested kernel strategy if exists else nullptr
- */
-std::unique_ptr<IInterleavedStrategy> create_strategy(const std::string &kernel_name)
-{
-#if defined(__arm__)
- if(kernel_name.find("sgemm_8x6") != std::string::npos)
- {
- return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::sgemm_8x6>>();
- }
-#endif // defined(__arm__)
-#if defined(__aarch64__)
- if(kernel_name.find("gemm_s8_4x4") != std::string::npos)
- {
- return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::gemm_s8_4x4>>();
- }
- if(kernel_name.find("gemm_s8_12x8") != std::string::npos)
- {
- return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::gemm_s8_12x8>>();
- }
- if(kernel_name.find("gemm_u8_4x4") != std::string::npos)
- {
- return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::gemm_u8_4x4>>();
- }
- if(kernel_name.find("gemm_u8_12x8") != std::string::npos)
- {
- return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::gemm_u8_12x8>>();
- }
-#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
- if(kernel_name.find("hgemm_24x8") != std::string::npos)
- {
- return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::hgemm_24x8>>();
- }
-#endif // defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
- if(kernel_name.find("sgemm_12x8") != std::string::npos)
- {
- return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::sgemm_12x8>>();
- }
-#if defined(__ARM_FEATURE_SVE)
- if(kernel_name.find("interleaved_fp16_mla_3VLx8") != std::string::npos)
- {
- return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::interleaved_fp16_mla_3VLx8>>();
- }
- if(kernel_name.find("interleaved_fp32_mla_3VLx8") != std::string::npos)
- {
- return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::interleaved_fp32_mla_3VLx8>>();
- }
- if(kernel_name.find("interleaved_s8s32_dot_3VLx8") != std::string::npos)
- {
- return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::interleaved_s8s32_dot_3VLx8>>();
- }
- if(kernel_name.find("interleaved_u8u32_dot_3VLx8") != std::string::npos)
- {
- return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::interleaved_u8u32_dot_3VLx8>>();
- }
-#endif // defined(__ARM_FEATURE_SVE)
-#endif // defined(__aarch64__)_
- return nullptr;
-}
-} // namespace detail
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEGEMMINTERLEAVEDSTRATEGIES_H__ */
diff --git a/src/core/NEON/kernels/assembly/NEGEMMNativeWrapperKernel.cpp b/src/core/NEON/kernels/assembly/NEGEMMNativeWrapperKernel.cpp
deleted file mode 100644
index ecdb5a938c..0000000000
--- a/src/core/NEON/kernels/assembly/NEGEMMNativeWrapperKernel.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/NEON/kernels/assembly/NEGEMMNativeWrapperKernel.h"
-
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/WindowIterator.h"
-
-#include "../arm_gemm/utils.hpp"
-#include "arm_gemm.hpp"
-
-#include "../arm_gemm/mergeresults.hpp"
-#include "../arm_gemm/transform.hpp"
-
-#include "../arm_gemm/kernels/a64_sgemm_native_16x4.hpp"
-
-namespace arm_compute
-{
-namespace
-{
-template <typename To, typename Tr>
-struct Kernel
-{
-};
-
-#ifdef __aarch64__
-template <>
-struct Kernel<float, float>
-{
- using strategy = arm_gemm::sgemm_native_16x4;
-};
-#endif /* __aarch64__ */
-
-} // namespace
-
-template <typename To, typename Tr>
-Window NEGEMMNativeWrapperKernel<To, Tr>::configure_internal(float alpha, float beta)
-{
- ARM_COMPUTE_UNUSED(alpha);
- using strategy = typename Kernel<To, Tr>::strategy;
-
- _beta = beta;
-
- //Note: The window is shifted down by 1 dimension compare to the tensors
- Window window;
- window.set(Window::DimX, Window::Dimension(0, ceil_to_multiple(_params.M, strategy::out_height()), strategy::out_height()));
- window.set(Window::DimY, Window::Dimension(0, _params.batches));
- window.set(Window::DimZ, Window::Dimension(0, _params.multis));
-
- return window;
-}
-
-template <typename To, typename Tr>
-void NEGEMMNativeWrapperKernel<To, Tr>::run_internal(const Window &window, const Coordinates &start_offset, const Coordinates &end_offset, const ThreadInfo &info)
-{
- using strategy = typename Kernel<To, Tr>::strategy;
-
- TensorAccessor<To> a(*_a);
- TensorAccessor<To> b(*_b);
- TensorAccessor<Tr> c(*_c);
-
- // Handle 3d input re-interpretation
- if(_gemm_info.reinterpret_input_as_3d())
- {
- Strides a_strides_as_3d = _a->info()->strides_in_bytes();
- a_strides_as_3d.remove(Window::DimZ);
- a.set_strides(a_strides_as_3d);
- }
-
- // Handle 3d output re-interpretation
- if(_gemm_info.depth_output_gemm3d() != 0)
- {
- Strides c_strides_as_3d = _c->info()->strides_in_bytes();
- c_strides_as_3d.remove(Window::DimZ);
- c.set_strides(c_strides_as_3d);
- }
-
- unsigned int m_end = 0;
-
- strategy strat(info.cpu_info);
- auto window_iterator = arm_compute::create_window_iterator(window, start_offset, end_offset, [&](const Coordinates & id)
- {
- const unsigned int y0 = id.x();
- const unsigned int batch = id.y();
- const unsigned int multi = id.z();
- const unsigned int ymax = std::min(y0 + strategy::out_height(), m_end);
-
- // TODO(COMPMID-1424) : Agree on gemm IO layouts
- strat.kernel(a(0, y0, batch, multi), a.stride(Window::DimY),
- b(0, 0, multi), b.stride(Window::DimY),
- c(0, y0, batch, multi), c.stride(Window::DimY),
- _beta, (ymax - y0), _params.N, _params.K);
- });
-
- auto on_new_row_size = [&](unsigned int start, unsigned int end)
- {
- ARM_COMPUTE_UNUSED(start);
- m_end = std::min(end, _params.M);
- };
-
- window_iterator.iterate_3D(on_new_row_size);
-}
-
-#ifdef __aarch64__
-template class NEGEMMNativeWrapperKernel<float, float>;
-#endif /* __aarch64__ */
-
-} // namespace arm_compute