aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
diff options
context:
space:
mode:
authorChunosov <N.Chunosov@yandex.ru>2017-11-22 20:42:13 +0700
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:42:17 +0000
commit5124be5d1caa70964d452cf9a8cc7c67df31fa9d (patch)
tree77d74963e9c3f52050cbc264a692133395182e98 /src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
parent9873ea3f1ea238ba7abfb635807614517c52be4b (diff)
downloadComputeLibrary-5124be5d1caa70964d452cf9a8cc7c67df31fa9d.tar.gz
COMPMID-661: Convolution quantized (#32)
Change-Id: Id69df4ce98d1d89bdf9c9aa5c4d909659909b30f Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110456 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp')
-rw-r--r--src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp39
1 files changed, 26 insertions, 13 deletions
diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
index 5d2d13e243..5c6f5b4ed0 100644
--- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
@@ -35,11 +35,11 @@ using namespace arm_compute;
CLGEMMLowpMatrixMultiplyCore::CLGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _mm_kernel(), _mtx_a_reshape_kernel(), _mtx_b_reshape_kernel(), _mtx_a_reduction_kernel(), _mtx_b_reduction_kernel(), _offset_contribution_kernel(),
- _vector_sum_col(), _vector_sum_row(), _tmp_a(), _tmp_b(), _a_offset(0), _b_offset(0), _is_interleaved_transposed(true)
+ _vector_sum_col(), _vector_sum_row(), _tmp_a(), _tmp_b(), _a_offset(0), _b_offset(0), _is_interleaved_transposed(true), _is_first_run(true), _reshape_b_only_on_first_run(false)
{
}
-void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor *b, ICLTensor *output)
+void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor *b, ICLTensor *output, const GEMMInfo &gemm_info)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::QASYMM8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S32);
@@ -47,9 +47,12 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor
ARM_COMPUTE_ERROR_ON_MSG((a)->info()->dimension(0) != (b)->info()->dimension(1), "The product AB is defined only if the number of columns in A is equal to the number of rows in B");
ARM_COMPUTE_ERROR_ON_MSG((a)->info()->dimension(1) != (output)->info()->dimension(1), "The output matrix must have the same number of rows as the matrix A");
ARM_COMPUTE_ERROR_ON_MSG((b)->info()->dimension(0) != (output)->info()->dimension(0), "The output matrix must have the same number of columns as the matrix B");
+ ARM_COMPUTE_ERROR_ON_MSG(gemm_info.is_a_reshaped(), "Matrix A already reshaped is not supported");
+ ARM_COMPUTE_ERROR_ON_MSG(gemm_info.is_b_reshaped(), "Matrix B already reshaped is not supported");
- _a_offset = a->info()->quantization_info().offset;
- _b_offset = b->info()->quantization_info().offset;
+ _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
+ _a_offset = a->info()->quantization_info().offset;
+ _b_offset = b->info()->quantization_info().offset;
// If the input tensor has less than 16 rows, we run a special version of GEMMLowp without reshaping the input tensors
_is_interleaved_transposed = a->info()->dimension(1) > 16;
@@ -93,7 +96,8 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor
if(_a_offset != 0)
{
TensorShape shape_vector_sum_col = b->info()->tensor_shape();
- if(b->info()->num_dimensions() > 1)
+
+ if(shape_vector_sum_col.num_dimensions() > 1)
{
shape_vector_sum_col.remove_dimension(1);
}
@@ -152,8 +156,21 @@ void CLGEMMLowpMatrixMultiplyCore::run()
// Run reshape matrix A
CLScheduler::get().enqueue(_mtx_a_reshape_kernel, false);
- // Run reshape matrix B
- CLScheduler::get().enqueue(_mtx_b_reshape_kernel, false);
+ if(_is_first_run || !_reshape_b_only_on_first_run)
+ {
+ // Run reshape matrix B
+ CLScheduler::get().enqueue(_mtx_b_reshape_kernel, false);
+ }
+ }
+
+ // Note: if _reshape_b_only_on_first_run = true, the reduction kernel can be executed only once
+ if(_is_first_run || !_reshape_b_only_on_first_run)
+ {
+ // Run matrix B reduction kernel only if _a_offset is not equal to 0
+ if(_a_offset != 0)
+ {
+ CLScheduler::get().enqueue(_mtx_b_reduction_kernel, false);
+ }
}
// Run matrix multiply
@@ -165,14 +182,10 @@ void CLGEMMLowpMatrixMultiplyCore::run()
CLScheduler::get().enqueue(_mtx_a_reduction_kernel, false);
}
- // Run matrix B reduction kernel only if _a_offset is not equal to 0
- if(_a_offset != 0)
- {
- CLScheduler::get().enqueue(_mtx_b_reduction_kernel, false);
- }
-
// Run offset contribution kernel
CLScheduler::get().enqueue(_offset_contribution_kernel, true);
_memory_group.release();
+
+ _is_first_run = false;
}