aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp
diff options
context:
space:
mode:
authorAnthony Barbier <anthony.barbier@arm.com>2017-12-12 11:27:55 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:42:33 +0000
commit93b9bdb49f0f1e715c7ad251b6886c1a49945b5a (patch)
treec31436a3c8f634fb22bd30a9ee25f8788fbdb69c /src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp
parent40626803bc2df7cda8a86e947636e9b950726384 (diff)
downloadComputeLibrary-93b9bdb49f0f1e715c7ad251b6886c1a49945b5a.tar.gz
COMPMID-743 Fixed AccessWindow in GEMMLowpMatrixMultiply
Change-Id: I8c9ae389756aa8ca346ad1ebfa1feac9a47964a5 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/112863 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp7
1 files changed, 5 insertions, 2 deletions
diff --git a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp
index d22773d5e0..9104f0b98a 100644
--- a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp
@@ -743,6 +743,7 @@ Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1,
ARM_COMPUTE_RETURN_ERROR_ON_MSG(in0_shape[2] != out_shape[2], "Output tensor must have the same number of batches of input0 tensor");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(in1_shape[2] != 1 && in0_shape[2] != in1_shape[2], "Input1 tensor must have the same number of batches of input0 or the number of batches must be set to 1");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(in1_shape[0] % 16, "Input1's width must be a multiple of 16");
}
return Status{};
@@ -777,8 +778,10 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITe
{
win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
- AccessWindowStatic in0_access(input0, 0, 0, ceil_to_multiple(input0->dimension(0), 8), input0->dimension(1));
- AccessWindowHorizontal in1_access(input1, 0, num_elems_processed_per_iteration_x);
+ unsigned int num_k_iterations = ceil_to_multiple(input1->dimension(0), num_elems_processed_per_iteration_x) / 16;
+ // For each iteration of "k" we increment the input pointer by 4, and we load 8 elements a the time:
+ AccessWindowStatic in0_access(input0, 0, 0, (num_k_iterations - 1) * 4 + 8, input0->dimension(1));
+ AccessWindowHorizontal in1_access(input1, 0, input1->dimension(0));
AccessWindowRectangle output_access(output, 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);
window_changed = update_window_and_padding(win, in0_access, in1_access, output_access);