diff options
author | Pablo Tello <pablo.tello@arm.com> | 2018-05-03 10:42:35 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:50:48 +0000 |
commit | 6c4212789a530c3655258779219c4ed7f0397b86 (patch) | |
tree | d71d57c8c44ebcf00651f0cd1487f4d6ab19f9e3 | |
parent | 7f60f7e287ef5c53a48c6c5ee4e73a3437b81c12 (diff) | |
download | ComputeLibrary-6c4212789a530c3655258779219c4ed7f0397b86.tar.gz |
COMPMID-1105: Fix mismatches conv layer when using NativeGemm with multiple threads
Change-Id: Id5ba16a7e3382070fda936c63d174df53596da04
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/129964
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
-rw-r--r-- | src/core/NEON/kernels/arm_gemm/gemm_native.hpp | 5 | ||||
-rw-r--r-- | tests/datasets/SmallConvolutionLayerDataset.h | 2 |
2 files changed, 4 insertions, 3 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_native.hpp b/src/core/NEON/kernels/arm_gemm/gemm_native.hpp index beecb76f20..21861eb8b6 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_native.hpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_native.hpp @@ -87,8 +87,7 @@ public: #ifdef CYCLE_PROFILING profiler prof; #endif - strategy strat(_ci); - + strategy strat(_ci); const unsigned int window_per_batch = iceildiv(_Msize, strategy::out_height); const unsigned int window_per_multi = window_per_batch * _nbatches; @@ -109,7 +108,7 @@ public: const unsigned int batch_0 = (multi == first_multi) ? first_batch : 0; const unsigned int batch_max = (multi == last_multi) ? last_batch : _nbatches; - for(unsigned int batch = batch_0; batch < batch_max; batch++) + for(unsigned int batch = batch_0; batch <= batch_max; batch++) { const unsigned int m_start = ((multi == first_multi) && (batch == first_batch)) ? first_row : 0; const unsigned int m_end = ((multi == last_multi) && (batch == last_batch)) ? last_row : _Msize; diff --git a/tests/datasets/SmallConvolutionLayerDataset.h b/tests/datasets/SmallConvolutionLayerDataset.h index 696c396eef..8e34f0ab1a 100644 --- a/tests/datasets/SmallConvolutionLayerDataset.h +++ b/tests/datasets/SmallConvolutionLayerDataset.h @@ -66,6 +66,8 @@ class SmallConvolutionLayerDataset final : public ConvolutionLayerDataset public: SmallConvolutionLayerDataset() { + add_config(TensorShape(224U, 224U, 3U), TensorShape(3U, 3U, 3U, 32U), TensorShape(32U), TensorShape(112U, 112U, 32U), + PadStrideInfo(2, 2, /*left*/ 0, /*right*/ 1, /*top*/ 0, /*bottom*/ 1, DimensionRoundingType::FLOOR)); // Batch size 1 add_config(TensorShape(23U, 27U, 5U), TensorShape(3U, 3U, 5U, 21U), TensorShape(21U), TensorShape(11U, 25U, 21U), PadStrideInfo(2, 1, 0, 0)); add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 5U, 7U, 16U), TensorShape(16U), TensorShape(11U, 12U, 16U), PadStrideInfo(3, 2, 1, 0)); |