diff options
author | SiCongLi <sicong.li@arm.com> | 2021-10-24 19:12:33 +0100 |
---|---|---|
committer | SiCong Li <sicong.li@arm.com> | 2021-11-02 10:41:11 +0000 |
commit | afa19725f7f3feb2c21a6aed02ade49d08e3097b (patch) | |
tree | d796239f542cf447060e6fa6d1240fecb3d6c7a6 /src/gpu/cl/operators/ClGemm.cpp | |
parent | 579ca84bd8ef5a91eded65c4dc5e0b9f7de8bef1 (diff) | |
download | ComputeLibrary-afa19725f7f3feb2c21a6aed02ade49d08e3097b.tar.gz |
Add post ops to ClGemmMatrixMultiplyReshapedOnlyRHSKernel and ClGemmMatrixMultiplyNativeKernel Part 3
Partially resolves: COMPMID-4435
Change-Id: Ifc5affa3a24a70942ca2d001380205df09b03ad7
Signed-off-by: SiCongLi <sicong.li@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6550
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/gpu/cl/operators/ClGemm.cpp')
-rw-r--r-- | src/gpu/cl/operators/ClGemm.cpp | 20 |
1 files changed, 11 insertions, 9 deletions
diff --git a/src/gpu/cl/operators/ClGemm.cpp b/src/gpu/cl/operators/ClGemm.cpp index e05256ee2f..50ecb214e3 100644 --- a/src/gpu/cl/operators/ClGemm.cpp +++ b/src/gpu/cl/operators/ClGemm.cpp @@ -204,7 +204,6 @@ ClGemm::ClGemm() void ClGemm::configure_native(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info) { - ARM_COMPUTE_ERROR_ON_MSG(gemm_info.post_ops().size() > 0, "PostOps are not supported in this kernel"); DataType data_type = a->data_type(); bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); @@ -223,6 +222,7 @@ void ClGemm::configure_native(const CLCompileContext &compile_context, ITensorIn kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d; kernel_info.broadcast_bias = broadcast_bias; kernel_info.activation_info = gemm_info.activation_info(); + kernel_info.post_ops = gemm_info.post_ops(); // Set the target for the kernels _mm_native_kernel->set_target(gpu_target); @@ -281,7 +281,6 @@ void ClGemm::configure_reshaped(const CLCompileContext &compile_context, ITensor void ClGemm::configure_reshaped_only_rhs(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info) { - ARM_COMPUTE_ERROR_ON_MSG(gemm_info.post_ops().size() > 0, "PostOps are not supported in this kernel"); DataType data_type = a->data_type(); bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); @@ -300,6 +299,7 @@ void ClGemm::configure_reshaped_only_rhs(const CLCompileContext &compile_context kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d; kernel_info.broadcast_bias = broadcast_bias; kernel_info.activation_info = gemm_info.activation_info(); + kernel_info.post_ops = gemm_info.post_ops(); // Set the target for the kernels _mm_reshaped_only_rhs_kernel->set_target(gpu_target); @@ -334,7 +334,6 @@ Status ClGemm::validate_native(const ITensorInfo *a, const ITensorInfo *b, const { ARM_COMPUTE_UNUSED(alpha); ARM_COMPUTE_UNUSED(output); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.post_ops().size() > 0, "PostOps are not supported in this kernel"); // Get the GPU target const GPUTarget gpu_target = CLScheduler::get().target(); @@ -355,6 +354,7 @@ Status ClGemm::validate_native(const ITensorInfo *a, const ITensorInfo *b, const kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d; kernel_info.broadcast_bias = broadcast_bias; kernel_info.activation_info = gemm_info.activation_info(); + kernel_info.post_ops = gemm_info.post_ops(); auto config = auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size }); @@ -418,7 +418,6 @@ Status ClGemm::validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInf { ARM_COMPUTE_UNUSED(alpha); ARM_COMPUTE_UNUSED(output); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.post_ops().size() > 0, "PostOps are not supported in this kernel"); TensorInfo tmp_b_info{}; @@ -441,6 +440,7 @@ Status ClGemm::validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInf kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d; kernel_info.broadcast_bias = broadcast_bias; kernel_info.activation_info = gemm_info.activation_info(); + kernel_info.post_ops = gemm_info.post_ops(); GEMMLHSMatrixInfo lhs_info; GEMMRHSMatrixInfo rhs_info; @@ -562,10 +562,9 @@ Status ClGemm::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso void ClGemm::run(ITensorPack &tensors) { - const ITensor *lhs = tensors.get_const_tensor(ACL_SRC_0); - const ITensor *rhs = tensors.get_const_tensor(ACL_SRC_1); - const ITensor *src2 = tensors.get_const_tensor(ACL_SRC_2); - ITensor *dst = tensors.get_tensor(ACL_DST); + const ITensor *lhs = tensors.get_const_tensor(ACL_SRC_0); + const ITensor *rhs = tensors.get_const_tensor(ACL_SRC_1); + ITensor *dst = tensors.get_tensor(ACL_DST); ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, dst); @@ -620,7 +619,10 @@ void ClGemm::run(ITensorPack &tensors) const unsigned int cross_plane_pad_dst = dst->info()->padding().top + dst->info()->padding().bottom; bool has_pad_y = (cross_plane_pad_lhs != 0) || (cross_plane_pad_dst != 0); - ITensorPack gemm_reshaped_onlyrhs_pack{ { ACL_SRC_0, lhs }, { ACL_SRC_1, rhs_reshaped.get() }, { ACL_SRC_2, src2 }, { ACL_DST, dst } }; + // Copy original tensor pack and overwrite rhs with reshaped counterpart + ITensorPack gemm_reshaped_onlyrhs_pack(tensors); + gemm_reshaped_onlyrhs_pack.add_const_tensor(ACL_SRC_1, rhs_reshaped.get()); + if(has_pad_y) { CLScheduler::get().enqueue_op(*_mm_reshaped_only_rhs_fallback_kernel, gemm_reshaped_onlyrhs_pack, true); |