aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL/functions/CLGEMM.cpp
diff options
context:
space:
mode:
authorIsabella Gottardi <isabella.gottardi@arm.com>2018-03-01 16:42:00 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:53:09 +0000
commit8e74f4488daf1b628ca718396d5fc72fea95a83d (patch)
treef372c61aab423799f82ea7a98aa6a157a4887bdc /src/runtime/CL/functions/CLGEMM.cpp
parent0a887922c73bbe7c5d42b1eb3ae55730f0d9a139 (diff)
downloadComputeLibrary-8e74f4488daf1b628ca718396d5fc72fea95a83d.tar.gz
COMPMID-911: Allow GEMM to work with 3D tensors
Change-Id: I8c4823a0d909e19e9ef548f00b9ae98c66de61dd Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/123569 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/runtime/CL/functions/CLGEMM.cpp')
-rw-r--r--src/runtime/CL/functions/CLGEMM.cpp9
1 files changed, 3 insertions, 6 deletions
diff --git a/src/runtime/CL/functions/CLGEMM.cpp b/src/runtime/CL/functions/CLGEMM.cpp
index a0ec66f804..f9713bb586 100644
--- a/src/runtime/CL/functions/CLGEMM.cpp
+++ b/src/runtime/CL/functions/CLGEMM.cpp
@@ -24,10 +24,6 @@
#include "arm_compute/runtime/CL/functions/CLGEMM.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/GPUTarget.h"
#include "arm_compute/core/Helpers.h"
@@ -111,6 +107,7 @@ void CLGEMM::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *
const int m = a->info()->dimension(1);
const int n = b->info()->dimension(0);
const int k = a->info()->dimension(0);
+ const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
int mult_transpose1xW_width = 1;
int mult_interleave4x4_height = 1;
@@ -144,7 +141,7 @@ void CLGEMM::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *
}
// Configure and tune matrix multiply kernel
- _mm_kernel.configure(matrix_a, matrix_b, output, alpha, _is_interleaved_transposed, GEMMReshapeInfo(m, n, k, mult_transpose1xW_width, mult_interleave4x4_height));
+ _mm_kernel.configure(matrix_a, matrix_b, output, alpha, _is_interleaved_transposed, GEMMReshapeInfo(m, n, k, mult_transpose1xW_width, mult_interleave4x4_height, depth_output_gemm3d));
CLScheduler::get().tune_kernel_static(_mm_kernel);
if(_is_interleaved_transposed)
@@ -197,7 +194,7 @@ Status CLGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso
mult_interleave4x4_height = 2;
}
- const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, mult_transpose1xW_width, mult_interleave4x4_height);
+ const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, mult_transpose1xW_width, mult_interleave4x4_height, gemm_info.depth_output_gemm3d());
// Check if we need to reshape the matrix A and matrix B
const bool run_interleave_transpose = is_interleaved_transposed(m, n, k, a->data_type(), reshape_b_only_on_first_run, gpu_target);