From 3a50166ff71f8379682fe6ece2a94b7a4bb3daa3 Mon Sep 17 00:00:00 2001 From: SiCong Li Date: Fri, 26 Jun 2020 10:02:06 +0100 Subject: COMPMID-3338 COMPMID-3336 COMPMID-3584 COMPMID-3338 Remove store padding in CLGEMMMatrixMultiplyReshapedOnlyRHSKernel COMPMID-3336 Remove store padding in CLGEMMMatrixMultiplyNativeKernel COMPMID-3584 Fix VSTORE to correctly deal with scalar case * Implement STORE_BLOCK_BOUNDARY_AWARE, as part of the COMPMID-3332 investigation, with the following substantial changes: - Separate STORE_BLOCK_PARTIAL, STORE_ROW_PARTIAL and VSTORE_PARTIAL so that this change does not affect kernels not using STORE_BLOCK_BOUNDARY_AWARE. - Revamp vstore_ext_n to vstore_partial_n, and enhance VSTORE_PARTIAL to correctly handle both vector and scalar cases * Remove the store padding (dst tensor) in CLGEMMMatrixMultiplyReshapedOnlyRHSKernel and CLGEMMMatrixMultiplyNativeKernel * Add configuration tests to check no padding is added by the configuration. Signed-off-by: SiCong Li Change-Id: I4f0907867979d8dacedd03b4bcbd2fb19e4f1602 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3522 Comments-Addressed: Arm Jenkins Reviewed-by: Gian Marco Iodice Tested-by: Arm Jenkins --- .../CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp') diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp index 152430fd2e..8b4f930ea4 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp @@ -185,15 +185,13 @@ std::pair validate_and_configure_window(ITensorInfo *input0, ITe input1->dimension(0), input1->dimension(1)); AccessWindowStatic output_access(output, 0, 0, - ceil_to_multiple(output->dimension(0), num_elems_processed_per_iteration_x), - output->dimension(1) + bottom_pad); + output->dimension(0), + output->dimension(1)); if(input2 != nullptr) { - const int bias_processed_per_iteration_x = num_elems_processed_per_iteration_x; - - const int bias_processed_per_iteration_y = gemm_info.broadcast_bias ? 1 : num_elems_processed_per_iteration_y; - + const int bias_processed_per_iteration_x = num_elems_processed_per_iteration_x; + const int bias_processed_per_iteration_y = gemm_info.broadcast_bias ? 1 : num_elems_processed_per_iteration_y; AccessWindowStatic input2_access(input2, 0, 0, ceil_to_multiple(input2->dimension(0), bias_processed_per_iteration_x), ceil_to_multiple(input2->dimension(1), bias_processed_per_iteration_y)); @@ -281,6 +279,10 @@ void CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::configure(const CLCompileContext const unsigned int h_gemm_3d = _reinterpret_output_as_3d ? output->info()->dimension(1) : input0->info()->dimension(1); const unsigned int d_gemm_3d = _reinterpret_output_as_3d ? output->info()->dimension(2) : input0->info()->dimension(2); + // Calculate partial (store instead of load) M0 and partial N0 for the partial blocks at the end of a row/column if any. This is to avoid padding. + const unsigned int partial_store_m0 = internal_m % lhs_info.m0; + const unsigned int partial_store_n0 = gemm_info.n % rhs_info.n0; + // Create build options CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input0->info()->data_type())); @@ -304,6 +306,8 @@ void CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::configure(const CLCompileContext build_opts.add_option("-DN0=" + support::cpp11::to_string(rhs_info.n0)); build_opts.add_option("-DK0=" + support::cpp11::to_string(rhs_info.k0)); build_opts.add_option("-DH0=" + support::cpp11::to_string(rhs_info.h0)); + build_opts.add_option("-DPARTIAL_STORE_M0=" + support::cpp11::to_string(partial_store_m0)); + build_opts.add_option("-DPARTIAL_STORE_N0=" + support::cpp11::to_string(partial_store_n0)); build_opts.add_option_if(gemm_info.activation_info.enabled(), "-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(gemm_info.activation_info.activation()))); build_opts.add_option_if(gemm_info.activation_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(gemm_info.activation_info.a())); build_opts.add_option_if(gemm_info.activation_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(gemm_info.activation_info.b())); -- cgit v1.2.1