diff options
author | Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com> | 2023-06-19 14:57:57 +0100 |
---|---|---|
committer | Mohmun02 <MohammedSuhail.Munshi@arm.com> | 2023-06-26 11:34:03 +0000 |
commit | a2bb80ea7111509c24caad8629533089decef430 (patch) | |
tree | f674572e0cc705af9b66633bfcd9d6ad9e29d970 /src/gpu/cl/operators/ClFullyConnected.h | |
parent | c952596e70f2fe0073029f053e329a4e930ced8c (diff) | |
download | ComputeLibrary-a2bb80ea7111509c24caad8629533089decef430.tar.gz |
Use MatMul in fully connected layer with dynamic weights when supported
- Use MatMul kernels in FC layer when using dynamic weights without broadcasting or bias.
- Fix minor typo in IClMatMulNativeKernelConfig.h
Partially Resolves : [COMPMID-6193]
Signed-off-by: Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com>
Change-Id: Id494062b5b4f4e75ff9714c202dde941955afa52
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9797
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/gpu/cl/operators/ClFullyConnected.h')
-rw-r--r-- | src/gpu/cl/operators/ClFullyConnected.h | 19 |
1 files changed, 16 insertions, 3 deletions
diff --git a/src/gpu/cl/operators/ClFullyConnected.h b/src/gpu/cl/operators/ClFullyConnected.h index 11a59b2359..5dc68c1bbe 100644 --- a/src/gpu/cl/operators/ClFullyConnected.h +++ b/src/gpu/cl/operators/ClFullyConnected.h @@ -42,7 +42,12 @@ class ClFlatten; class ClGemm; class ClGemmLowpMatrixMultiplyCore; class ClTranspose; - +// Kernel Forward Declarations +namespace kernels +{ +class ClMatMulNativeKernel; +class ClMatMulLowpNativeKernel; +} /** Basic function to compute a Fully Connected layer on OpenCL. This function calls the following OpenCL kernels: * * -# @ref opencl::kernels::ClIm2ColKernel (called when the input comes from a convolutional layer) @@ -119,12 +124,19 @@ private: std::unique_ptr<ClGemm> _mm_gemm; std::unique_ptr<ClGemmLowpMatrixMultiplyCore> _mm_gemmlowp; + std::unique_ptr<kernels::ClMatMulNativeKernel> _matmul_native_kernel; + std::unique_ptr<kernels::ClMatMulLowpNativeKernel> _matmul_lowp_native_kernel; + experimental::MemoryRequirements _aux_mem{}; TensorInfo _flattened_src{}; TensorInfo _converted_weights{}; TensorInfo _reshaped_weights{}; + // Saved tensor shapes for reshaping when using matmul + TensorShape _lhs_shape_original{}; + TensorInfo _lhs_to_use{}; + TensorInfo _weights_to_use{}; int _weights_to_use_idx{ ACL_SRC_1 }; @@ -134,10 +146,11 @@ private: bool _is_quantized{ false }; bool _is_prepared{ false }; bool _dynamic_weights{ false }; + bool _use_matmul{ false }; #ifdef ARM_COMPUTE_ASSERTS_ENABLED - int _asrt_run_count{}; - int _asrt_prepare_count{}; + int _asrt_run_count {}; + int _asrt_prepare_count{}; #endif // ARM_COMPUTE_ASSERTS_ENABLED }; } // namespace opencl |