From 71cbd28b7cf5115b0451d43e5c84cce4ae4d8ec7 Mon Sep 17 00:00:00 2001 From: SiCongLi Date: Wed, 3 Nov 2021 12:17:06 +0000 Subject: Fix out-of-bound reads in cl gemm kernels * Revert "Remove padding in FP Cl Gemm kernels" This reverts commit 48717a3d38fef8d316cd4b9fd9a3bc1a43db736b. * Allow different boundary row handling strategies across native, reshaped and reshaped_only_rhs kernels by introducing a ELTWISE_OPERAND_ROW parameter to the macro Resolves COMPMID-4919 Change-Id: Icefc23c0760a6abb838fef1d0d5bda06b07c79e3 Signed-off-by: SiCongLi Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6569 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice --- .../kernels/ClGemmMatrixMultiplyNativeKernel.cpp | 46 +++++++++++++++++++--- 1 file changed, 41 insertions(+), 5 deletions(-) (limited to 'src/gpu/cl/kernels') diff --git a/src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp b/src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp index 7ad3d55fe0..c3efc24fa9 100644 --- a/src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp +++ b/src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp @@ -134,12 +134,15 @@ std::pair validate_and_configure_window(ITensorInfo *src0, ITens const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info, ElementsProcessed &num_elements_processed) { - ARM_COMPUTE_UNUSED(src0, src1, src2); unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0]; unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1]; bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d; bool reinterpret_output_as_3d = gemm_info.depth_output_gemm3d != 0; + Window win{}; + Window win_out{}; + bool window_changed = false; + // In case both input and dst have to be reinterpreted as 3D tensors, // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false. if(reinterpret_input_as_3d == reinterpret_output_as_3d) @@ -147,6 +150,9 @@ std::pair validate_and_configure_window(ITensorInfo *src0, ITens reinterpret_output_as_3d = false; } + // dst tensor auto initialization if not yet initialized + auto_init_if_empty(*dst, src0->clone()->set_tensor_shape(misc::shape_calculator::compute_mm_shape(*src0, *src1, gemm_info))); + TensorInfo tmp_info(*dst); if(reinterpret_output_as_3d) @@ -162,14 +168,44 @@ std::pair validate_and_configure_window(ITensorInfo *src0, ITens num_elems_processed_per_iteration_x = rhs_info.n0; num_elems_processed_per_iteration_y = lhs_info.m0; - Window win = calculate_max_window(tmp_info, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); + win = calculate_max_window(tmp_info, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); + win_out = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); + + AccessWindowStatic src0_access(src0, 0, 0, + src0->dimension(0), + src0->dimension(1)); + AccessWindowStatic src1_access(src1, 0, 0, + ceil_to_multiple(src1->dimension(0), num_elems_processed_per_iteration_x), + src1->dimension(1)); + AccessWindowStatic dst_access(dst, 0, 0, + dst->dimension(0), + dst->dimension(1)); + + if(src2 != nullptr) + { + const int bias_processed_per_iteration_x = num_elems_processed_per_iteration_x; + + AccessWindowStatic src2_access(src2, 0, 0, + ceil_to_multiple(src2->dimension(0), bias_processed_per_iteration_x), + src2->dimension(1)); + + window_changed = update_window_and_padding(win, src0_access, src1_access, src2_access) || // window used by the execute_window_loop + update_window_and_padding(win_out, dst_access); // window used to update the padding requirements of dst tensor + } + else + { + window_changed = update_window_and_padding(win, src0_access, src1_access) || // window used by the execute_window_loop + update_window_and_padding(win_out, dst_access); // window used to update the padding requirements of dst tensor + } // Collapse along the Z direction // This collapse needs to be here in order to tune the Z dimension of LWS + Window collapsed = win; const unsigned int dimension_to_collapse = std::min(static_cast(dst->num_dimensions()), 2u); - Window collapsed = win.collapse(win, dimension_to_collapse); + collapsed = win.collapse(win, dimension_to_collapse); - return std::make_pair(Status{}, collapsed); + Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + return std::make_pair(err, collapsed); } } // namespace @@ -190,7 +226,7 @@ void ClGemmMatrixMultiplyNativeKernel::configure(const CLCompileContext &compile ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src0, src1, src2, dst, alpha, beta, lhs_info, rhs_info, gemm_info)); - auto padding_info = get_padding_info({ src0, src1, src2, dst }); + auto padding_info = get_padding_info({ src0, dst }); _reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d; _reinterpret_output_as_3d = gemm_info.depth_output_gemm3d != 0; _use_dummy_work_items = preferred_dummy_work_items_support(CLKernelLibrary::get().get_device()); -- cgit v1.2.1