aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/assembly
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-06-21 18:43:12 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-07-05 15:30:24 +0000
commit37d080f2f11cfd734104b76512e1fb191486216e (patch)
treed5df067c826aacc0676e7e9557a54b61a9a3b7eb /src/core/NEON/kernels/assembly
parent11de30da8a9f79943255ddba7bb70a66b076673b (diff)
downloadComputeLibrary-37d080f2f11cfd734104b76512e1fb191486216e.tar.gz
COMPMID-2378: Sanitize GEMM configuration for NEON
Change-Id: I7859b82b2059e14685f8792424648ac5eacd67f1 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/1418 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/assembly')
-rw-r--r--src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp25
-rw-r--r--src/core/NEON/kernels/assembly/NEGEMMInterleavedStrategies.h37
-rw-r--r--src/core/NEON/kernels/assembly/NEGEMMNativeWrapperKernel.cpp18
3 files changed, 50 insertions, 30 deletions
diff --git a/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp b/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp
index 0fc3610014..d00f204b81 100644
--- a/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp
+++ b/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -33,11 +33,11 @@
using namespace arm_compute;
INEGEMMWrapperKernel::INEGEMMWrapperKernel()
- : _a(nullptr), _b(nullptr), _c(nullptr), _params(), _window3d(), _window_shape()
+ : _a(nullptr), _b(nullptr), _c(nullptr), _params(), _gemm_info(), _window3d(), _window_shape()
{
}
-INEGEMMWrapperKernel::Params INEGEMMWrapperKernel::extract_parameters(const ITensor *a, const ITensor *b, const ITensor *c)
+INEGEMMWrapperKernel::Params INEGEMMWrapperKernel::extract_parameters(const ITensor *a, const ITensor *b, const ITensor *c, const GEMMInfo &gemm_info)
{
Params p;
@@ -45,21 +45,30 @@ INEGEMMWrapperKernel::Params INEGEMMWrapperKernel::extract_parameters(const ITen
ARM_COMPUTE_ERROR_ON_NULLPTR(b);
ARM_COMPUTE_ERROR_ON_NULLPTR(c);
+ // Initalize params
p.M = c->info()->tensor_shape().y();
p.N = c->info()->tensor_shape().x();
p.K = a->info()->tensor_shape().x();
p.multis = b->info()->tensor_shape().z();
p.batches = c->info()->tensor_shape().total_size_upper(2) / p.multis; //COMPMID-1423: Agree on and document the layout of gemm inputs/outputs
+ // Update M in case of GEMM3D for output
+ if(gemm_info.depth_output_gemm3d() != 0)
+ {
+ p.M = c->info()->tensor_shape().y() * c->info()->tensor_shape().z();
+ p.batches = c->info()->tensor_shape().total_size_upper(3) / p.multis;
+ }
+
return p;
}
-void INEGEMMWrapperKernel::configure(const ITensor *a, const ITensor *b, ITensor *c, float alpha, float beta)
+void INEGEMMWrapperKernel::configure(const ITensor *a, const ITensor *b, ITensor *c, float alpha, float beta, const GEMMInfo &gemm_info)
{
- _params = extract_parameters(a, b, c);
- _a = a;
- _b = b;
- _c = c;
+ _gemm_info = gemm_info;
+ _params = extract_parameters(a, b, c, gemm_info);
+ _a = a;
+ _b = b;
+ _c = c;
_window3d = configure_internal(alpha, beta);
_window_shape = _window3d.shape();
diff --git a/src/core/NEON/kernels/assembly/NEGEMMInterleavedStrategies.h b/src/core/NEON/kernels/assembly/NEGEMMInterleavedStrategies.h
index 26d9e9999d..6e30148b5d 100644
--- a/src/core/NEON/kernels/assembly/NEGEMMInterleavedStrategies.h
+++ b/src/core/NEON/kernels/assembly/NEGEMMInterleavedStrategies.h
@@ -76,32 +76,34 @@ public:
* @param[in] transformed_a Reshaped tensor A.
* @param[in] block_walker Window representing the layout of the matrix's blocks.
* @param[in] params M, N, K sizes.
+ * @param[in] gemm_info GEMM meta-data
*
* @return A wrapped specialized transformA kernel
*/
virtual std::unique_ptr<NEGEMMInterleavedTransformAWrapper> instantiate_transformA(const ITensor *a,
ITensor *transformed_a,
const Window &block_walker,
- const INEGEMMWrapperKernel::Params &params) = 0;
+ const INEGEMMWrapperKernel::Params &params,
+ const GEMMInfo &gemm_info) = 0;
/** Instantiate and configure a prepareB Kernel
*
- * @param transformed_a Already reshaped tensor A.
- * @param transformed_b Already reshaped tensor B.
- * @param tmp_c Temporary buffer to be used to store intermediate results.
- * @param c Result tensor C.
- * @param block_walker Window containing iteration information for the M and batch dimensions.
- * @param block_sizes Block sizes to use for the matrix multiplication (A & B must have been reshaped using these same block sizes).
- * @param params M, N, K sizes.
- * @param alpha Alpha value
- * @param beta Beta value
- * @param pretranspose_b Is B also pretransposed ?
- * @param num_threads Maximum number of threads that might be used for the calculations.
+ * @param[in] transformed_a Already reshaped tensor A.
+ * @param[in] transformed_b Already reshaped tensor B.
+ * @param[in] tmp_c Temporary buffer to be used to store intermediate results.
+ * @param[in] c Result tensor C.
+ * @param[in] block_walker Window containing iteration information for the M and batch dimensions.
+ * @param[in] block_sizes Block sizes to use for the matrix multiplication (A & B must have been reshaped using these same block sizes).
+ * @param[in] params M, N, K sizes.
+ * @param[in] alpha Alpha value
+ * @param[in] beta Beta value
+ * @param[in] gemm_info GEMM meta-data
+ * @param[in] num_threads Maximum number of threads that might be used for the calculations.
*
* @return A wrapped specialized MatrixMultiply kernel
*/
virtual std::unique_ptr<NEGEMMInterleavedMatrixMultiplyWrapper> instantiate_matrix_multiply(const ITensor *transformed_a, const ITensor *transformed_b, ITensor *tmp_c, ITensor *c,
const Window &block_walker, const BlockSizes &block_sizes,
- const INEGEMMWrapperKernel::Params &params, float alpha, float beta, bool pretranspose_b,
+ const INEGEMMWrapperKernel::Params &params, float alpha, float beta, const GEMMInfo &gemm_info,
unsigned int num_threads) = 0;
/** Calculates the block sizes of a given strategy
*
@@ -138,19 +140,20 @@ public:
std::unique_ptr<NEGEMMInterleavedTransformAWrapper> instantiate_transformA(const ITensor *a,
ITensor *transformed_a,
const Window &block_walker,
- const INEGEMMWrapperKernel::Params &params) override
+ const INEGEMMWrapperKernel::Params &params,
+ const GEMMInfo &gemm_info) override
{
auto transform_a = support::cpp14::make_unique<NEGEMMInterleavedTransformAWrapperTemplate<strategy>>();
- transform_a->configure(a, transformed_a, false, block_walker, params);
+ transform_a->configure(a, transformed_a, false, gemm_info.reinterpret_input_as_3d(), block_walker, params);
return std::move(transform_a);
}
std::unique_ptr<NEGEMMInterleavedMatrixMultiplyWrapper> instantiate_matrix_multiply(const ITensor *transformed_a, const ITensor *transformed_b, ITensor *tmp_c, ITensor *c,
const Window &block_walker, const BlockSizes &block_sizes,
- const INEGEMMWrapperKernel::Params &params, float alpha, float beta, bool pretranspose_b,
+ const INEGEMMWrapperKernel::Params &params, float alpha, float beta, const GEMMInfo &gemm_info,
unsigned int num_threads) override
{
auto matrix_multiply = support::cpp14::make_unique<NEGEMMInterleavedMatrixMultiplyWrapperTemplate<strategy>>();
- matrix_multiply->configure(transformed_a, transformed_b, tmp_c, c, block_walker, block_sizes, params, pretranspose_b, alpha, beta, num_threads);
+ matrix_multiply->configure(transformed_a, transformed_b, tmp_c, c, block_walker, block_sizes, params, gemm_info, alpha, beta, num_threads);
return std::move(matrix_multiply);
}
diff --git a/src/core/NEON/kernels/assembly/NEGEMMNativeWrapperKernel.cpp b/src/core/NEON/kernels/assembly/NEGEMMNativeWrapperKernel.cpp
index 97c20dbd4e..ecdb5a938c 100644
--- a/src/core/NEON/kernels/assembly/NEGEMMNativeWrapperKernel.cpp
+++ b/src/core/NEON/kernels/assembly/NEGEMMNativeWrapperKernel.cpp
@@ -81,12 +81,20 @@ void NEGEMMNativeWrapperKernel<To, Tr>::run_internal(const Window &window, const
TensorAccessor<To> b(*_b);
TensorAccessor<Tr> c(*_c);
- if(_a->info()->data_layout() == DataLayout::NHWC)
+ // Handle 3d input re-interpretation
+ if(_gemm_info.reinterpret_input_as_3d())
{
- // In the case of NHWC we want to interpret the output shape as 3D. Thus, the batch stride for A is
- // the relevant multiple of the row stride.
- const size_t nhwc_batch_stride = _a->info()->strides_in_bytes().y() * _c->info()->dimension(1);
- a.set_stride(2, nhwc_batch_stride);
+ Strides a_strides_as_3d = _a->info()->strides_in_bytes();
+ a_strides_as_3d.remove(Window::DimZ);
+ a.set_strides(a_strides_as_3d);
+ }
+
+ // Handle 3d output re-interpretation
+ if(_gemm_info.depth_output_gemm3d() != 0)
+ {
+ Strides c_strides_as_3d = _c->info()->strides_in_bytes();
+ c_strides_as_3d.remove(Window::DimZ);
+ c.set_strides(c_strides_as_3d);
}
unsigned int m_end = 0;