diff options
author | Anthony Barbier <anthony.barbier@arm.com> | 2017-12-12 11:27:55 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:42:33 +0000 |
commit | 93b9bdb49f0f1e715c7ad251b6886c1a49945b5a (patch) | |
tree | c31436a3c8f634fb22bd30a9ee25f8788fbdb69c | |
parent | 40626803bc2df7cda8a86e947636e9b950726384 (diff) | |
download | ComputeLibrary-93b9bdb49f0f1e715c7ad251b6886c1a49945b5a.tar.gz |
COMPMID-743 Fixed AccessWindow in GEMMLowpMatrixMultiply
Change-Id: I8c9ae389756aa8ca346ad1ebfa1feac9a47964a5
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/112863
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Anthony Barbier <anthony.barbier@arm.com>
-rw-r--r-- | src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp | 7 |
1 files changed, 5 insertions, 2 deletions
diff --git a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp index d22773d5e0..9104f0b98a 100644 --- a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp @@ -743,6 +743,7 @@ Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, ARM_COMPUTE_RETURN_ERROR_ON_MSG(in0_shape[2] != out_shape[2], "Output tensor must have the same number of batches of input0 tensor"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(in1_shape[2] != 1 && in0_shape[2] != in1_shape[2], "Input1 tensor must have the same number of batches of input0 or the number of batches must be set to 1"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(in1_shape[0] % 16, "Input1's width must be a multiple of 16"); } return Status{}; @@ -777,8 +778,10 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITe { win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); - AccessWindowStatic in0_access(input0, 0, 0, ceil_to_multiple(input0->dimension(0), 8), input0->dimension(1)); - AccessWindowHorizontal in1_access(input1, 0, num_elems_processed_per_iteration_x); + unsigned int num_k_iterations = ceil_to_multiple(input1->dimension(0), num_elems_processed_per_iteration_x) / 16; + // For each iteration of "k" we increment the input pointer by 4, and we load 8 elements a the time: + AccessWindowStatic in0_access(input0, 0, 0, (num_k_iterations - 1) * 4 + 8, input0->dimension(1)); + AccessWindowHorizontal in1_access(input1, 0, input1->dimension(0)); AccessWindowRectangle output_access(output, 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y); window_changed = update_window_and_padding(win, in0_access, in1_access, output_access); |