diff options
author | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2020-06-25 17:18:36 +0100 |
---|---|---|
committer | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2020-06-26 10:15:10 +0000 |
commit | e5563d9b0102846973f144cba42fb9002bebd09b (patch) | |
tree | 3ede792d30aad726a81b371e34bae16f30f5d81c /src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp | |
parent | 6cb26ce7ff35e0c9b634160603560feeb23b0cee (diff) | |
download | ComputeLibrary-e5563d9b0102846973f144cba42fb9002bebd09b.tar.gz |
COMPMID-3560: Fix F16 performance regression (OpenCL)
The performance regression was caused by a change in the interface
of the OpenCL kernels gemm_mm_reshaped_lhs_*
Change-Id: I030df4975dc040886c17e71710a27137b50edd9b
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3465
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp')
-rw-r--r-- | src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp | 6 |
1 files changed, 5 insertions, 1 deletions
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp index ba1c8a9d14..22bde635e6 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp @@ -225,7 +225,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITe CLGEMMMatrixMultiplyReshapedKernel::CLGEMMMatrixMultiplyReshapedKernel() : _input0(nullptr), _input1(nullptr), _input2(nullptr), _output(nullptr), _slide_matrix_b(true), _reinterpret_output_as_3d(false), _use_dummy_work_items(false), _add_bias(false), - _broadcast_bias(false), _export_to_cl_image(false) + _broadcast_bias(false), _export_to_cl_image(false), _k(1) { } @@ -254,6 +254,7 @@ void CLGEMMMatrixMultiplyReshapedKernel::configure(const CLCompileContext &compi _add_bias = _input2 != nullptr; _broadcast_bias = gemm_info.broadcast_bias; _export_to_cl_image = rhs_info.export_to_cl_image; + _k = gemm_info.k; // Check if we need to slide the matrix B const unsigned int num_dimensions_input0 = _input0->info()->num_dimensions(); @@ -435,6 +436,9 @@ void CLGEMMMatrixMultiplyReshapedKernel::run(const Window &window, cl::CommandQu // Output buffer add_2D_tensor_argument(idx, _output, slice); + // K dimension (not used if _export_to_cl_image == true) + _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(_k)); + // LHS stride_z _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(_input0->info()->strides_in_bytes()[2])); |