From 28287afbea9549e8e2904084ae895c04cca88e95 Mon Sep 17 00:00:00 2001 From: Sheri Zhang Date: Tue, 25 Feb 2020 14:13:54 +0000 Subject: COMPMID-2792: Add support for QASYMM8_SIGNED in CLGEMMLowpMatrixMultiplyReshapedKernel Signed-off-by: Sheri Zhang Change-Id: I005e604253394f31173f37ec0296caf76b5e697c Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/227008 Tested-by: bsgcomp Reviewed-by: Sang-Hoon Park Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2798 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Michele Di Giorgio --- src/core/CL/cl_kernels/gemmlowp.cl | 12 ++++++------ .../CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'src/core/CL') diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl index 239e039e10..8140b8f2a5 100644 --- a/src/core/CL/cl_kernels/gemmlowp.cl +++ b/src/core/CL/cl_kernels/gemmlowp.cl @@ -199,7 +199,7 @@ #define VECTOR_TYPE VEC_DATA_TYPE(DATA_TYPE, NUM_ELEMS_PROCESSED_PER_THREAD_X) #define VECTOR_ACC_TYPE VEC_DATA_TYPE(ACC_DATA_TYPE, NUM_ELEMS_PROCESSED_PER_THREAD_X) #define VECTOR_INT VEC_DATA_TYPE(int, NUM_ELEMS_PROCESSED_PER_THREAD_X) -/** This OpenCL kernel computes the matrix multiplication between matrix A (src0) and matrix B (src1) in case both matrices have not beed reshaped +/** This OpenCL kernel computes the matrix multiplication between matrix A (src0) and matrix B (src1) in case both matrices have not been reshaped * * @attention The number of matrix A columns needs to be passed at compile time using -DCOLS_A * @@ -481,7 +481,7 @@ __kernel void gemmlowp_mm_midgard(IMAGE_DECLARATION(src0), #endif // defined(NUM_ELEMS_PROCESSED_PER_THREAD_X) && defined(NUM_ELEMS_PROCESSED_PER_THREAD_Y) && defined(COLS_A) #if defined(M0) && defined(N0) && defined(K0) && defined(V0) && defined(H0) && defined(M) && defined(N) -/** This OpenCL kernel computes the matrix multiplication between 2 matrices with QASYMM data type. +/** This OpenCL kernel computes the matrix multiplication between 2 matrices with QASYMM/QASYMM_SIGNED data type. * The LHS matrix must be reshaped with @ref CLGEMMReshapeLHSMatrixKernel and the M0xK0 must be NOT transposed * The RHS matrix must be reshaped with @ref CLGEMMReshapeRHSMatrixKernel and the K0xN0 must be transposed * @@ -507,7 +507,7 @@ __kernel void gemmlowp_mm_midgard(IMAGE_DECLARATION(src0), * -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix NOT reshaped * - * @param[in] lhs_ptr Pointer to the LHS reshaped matrix. Supported data type: QASYMM8 + * @param[in] lhs_ptr Pointer to the LHS reshaped matrix. Supported data type: QASYMM8/QASYMM_SIGNED * @param[in] lhs_stride_x Stride of the LHS reshaped matrix in X dimension (in bytes) * @param[in] lhs_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] lhs_stride_y Stride of the LHS reshaped matrix in Y dimension (in bytes) @@ -519,7 +519,7 @@ __kernel void gemmlowp_mm_midgard(IMAGE_DECLARATION(src0), * @param[in] rhs_stride_y Stride of the RHS reshaped matrix in Y dimension (in bytes) * @param[in] rhs_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) * @param[in] rhs_offset_first_element_in_bytes The offset of the first element in the RHS reshaped matrix - * @param[out] dst_ptr Pointer to the destination matrix Supported data type: same as @p lhs_ptr + * @param[out] dst_ptr Pointer to the destination matrix Supported data type: S32 * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in bytes) * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in bytes) @@ -583,10 +583,10 @@ __kernel void gemmlowp_mm_reshaped_lhs_nt_rhs_t(IMAGE_DECLARATION(lhs), #endif // defined(DUMMY_WORK_ITEMS) // Compute LHS matrix address - __global uchar *lhs_addr = lhs_ptr + lhs_offset_first_element_in_bytes + (y % V0) * (uint)LHS_OFFSET_X + (y / V0) * (uint)lhs_stride_y + (z * lhs_stride_z); + __global DATA_TYPE *lhs_addr = lhs_ptr + lhs_offset_first_element_in_bytes + (y % V0) * (uint)LHS_OFFSET_X + (y / V0) * (uint)lhs_stride_y + (z * lhs_stride_z); // Compute RHS matrix address - __global uchar *rhs_addr = rhs_ptr + rhs_offset_first_element_in_bytes + (x % H0) * (uint)RHS_OFFSET_X + (x / (uint)H0) * rhs_stride_y; + __global DATA_TYPE *rhs_addr = rhs_ptr + rhs_offset_first_element_in_bytes + (x % H0) * (uint)RHS_OFFSET_X + (x / (uint)H0) * rhs_stride_y; #if defined(MATRIX_B_DEPTH) // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3 diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp index 8d3aff6603..8dfeb773fe 100644 --- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -54,7 +54,7 @@ Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, const GEMMReshapeInfo &gemm_info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input0, input1, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::QASYMM8); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1); ARM_COMPUTE_RETURN_ERROR_ON_MSG(input0->num_dimensions() > 4, "The number of dimensions for the LHS matrix must be <= 4"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->num_dimensions() > 3, "The number of dimensions for the RHS matrix must be <= 3"); @@ -309,4 +309,4 @@ void CLGEMMLowpMatrixMultiplyReshapedKernel::run(const Window &window, cl::Comma } while(window.slide_window_slice_3D(slice)); } -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute -- cgit v1.2.1