aboutsummaryrefslogtreecommitdiff
path: root/src/gpu/cl/operators/ClFullyConnected.h
diff options
context:
space:
mode:
authorMohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com>2023-06-19 14:57:57 +0100
committerMohmun02 <MohammedSuhail.Munshi@arm.com>2023-06-26 11:34:03 +0000
commita2bb80ea7111509c24caad8629533089decef430 (patch)
treef674572e0cc705af9b66633bfcd9d6ad9e29d970 /src/gpu/cl/operators/ClFullyConnected.h
parentc952596e70f2fe0073029f053e329a4e930ced8c (diff)
downloadComputeLibrary-a2bb80ea7111509c24caad8629533089decef430.tar.gz
Use MatMul in fully connected layer with dynamic weights when supported
- Use MatMul kernels in FC layer when using dynamic weights without broadcasting or bias. - Fix minor typo in IClMatMulNativeKernelConfig.h Partially Resolves : [COMPMID-6193] Signed-off-by: Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com> Change-Id: Id494062b5b4f4e75ff9714c202dde941955afa52 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9797 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/gpu/cl/operators/ClFullyConnected.h')
-rw-r--r--src/gpu/cl/operators/ClFullyConnected.h19
1 files changed, 16 insertions, 3 deletions
diff --git a/src/gpu/cl/operators/ClFullyConnected.h b/src/gpu/cl/operators/ClFullyConnected.h
index 11a59b2359..5dc68c1bbe 100644
--- a/src/gpu/cl/operators/ClFullyConnected.h
+++ b/src/gpu/cl/operators/ClFullyConnected.h
@@ -42,7 +42,12 @@ class ClFlatten;
class ClGemm;
class ClGemmLowpMatrixMultiplyCore;
class ClTranspose;
-
+// Kernel Forward Declarations
+namespace kernels
+{
+class ClMatMulNativeKernel;
+class ClMatMulLowpNativeKernel;
+}
/** Basic function to compute a Fully Connected layer on OpenCL. This function calls the following OpenCL kernels:
*
* -# @ref opencl::kernels::ClIm2ColKernel (called when the input comes from a convolutional layer)
@@ -119,12 +124,19 @@ private:
std::unique_ptr<ClGemm> _mm_gemm;
std::unique_ptr<ClGemmLowpMatrixMultiplyCore> _mm_gemmlowp;
+ std::unique_ptr<kernels::ClMatMulNativeKernel> _matmul_native_kernel;
+ std::unique_ptr<kernels::ClMatMulLowpNativeKernel> _matmul_lowp_native_kernel;
+
experimental::MemoryRequirements _aux_mem{};
TensorInfo _flattened_src{};
TensorInfo _converted_weights{};
TensorInfo _reshaped_weights{};
+ // Saved tensor shapes for reshaping when using matmul
+ TensorShape _lhs_shape_original{};
+ TensorInfo _lhs_to_use{};
+
TensorInfo _weights_to_use{};
int _weights_to_use_idx{ ACL_SRC_1 };
@@ -134,10 +146,11 @@ private:
bool _is_quantized{ false };
bool _is_prepared{ false };
bool _dynamic_weights{ false };
+ bool _use_matmul{ false };
#ifdef ARM_COMPUTE_ASSERTS_ENABLED
- int _asrt_run_count{};
- int _asrt_prepare_count{};
+ int _asrt_run_count {};
+ int _asrt_prepare_count{};
#endif // ARM_COMPUTE_ASSERTS_ENABLED
};
} // namespace opencl