aboutsummaryrefslogtreecommitdiff
path: root/src/gpu/cl/operators/ClMatMul.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/gpu/cl/operators/ClMatMul.cpp')
-rw-r--r--src/gpu/cl/operators/ClMatMul.cpp62
1 files changed, 47 insertions, 15 deletions
diff --git a/src/gpu/cl/operators/ClMatMul.cpp b/src/gpu/cl/operators/ClMatMul.cpp
index 15833216bb..3822c16aa1 100644
--- a/src/gpu/cl/operators/ClMatMul.cpp
+++ b/src/gpu/cl/operators/ClMatMul.cpp
@@ -22,8 +22,11 @@
* SOFTWARE.
*/
#include "src/gpu/cl/operators/ClMatMul.h"
+
#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Utils.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+
#include "src/common/utils/Log.h"
#include "src/gpu/cl/kernels/ClMatMulNativeKernel.h"
#include "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h"
@@ -37,45 +40,74 @@ namespace arm_compute
namespace opencl
{
using namespace arm_compute::opencl::kernels;
+
ClMatMul::ClMatMul()
- : _native_matmul_kernel(std::make_unique<ClMatMulNativeKernel>())
+ : _matmul_native_kernel(std::make_unique<ClMatMulNativeKernel>()),
+ _matmul_lowp_native_kernel(std::make_unique<ClMatMulLowpNativeKernel>())
{
}
-ClMatMul::~ClMatMul()
-{
-}
-Status ClMatMul::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *output, const MatMulInfo &matmul_info)
+
+Status ClMatMul::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst, const MatMulInfo &matmul_info)
{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(rhs, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
+
const GPUTarget gpu_target = CLScheduler::get().target();
std::unique_ptr<IClMatMulNativeKernelConfig> t = ClMatMulNativeKernelConfigurationFactory::create(gpu_target);
MatMulKernelInfo kernel_info = t->configure(lhs, rhs, matmul_info);
- return ClMatMulNativeKernel::validate(lhs, rhs, output, kernel_info);
+ bool is_quantized = is_data_type_quantized_asymmetric(lhs->data_type());
+
+ return is_quantized ? ClMatMulLowpNativeKernel::validate(lhs, rhs, dst, kernel_info) :
+ ClMatMulNativeKernel::validate(lhs, rhs, dst, kernel_info);
}
-void ClMatMul::configure(const CLCompileContext &compile_context, ITensorInfo *lhs, ITensorInfo *rhs, ITensorInfo *output, const MatMulInfo &matmul_info)
+
+void ClMatMul::configure(const CLCompileContext &compile_context, ITensorInfo *lhs, ITensorInfo *rhs, ITensorInfo *dst, const MatMulInfo &matmul_info)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs, output);
- ARM_COMPUTE_LOG_PARAMS(lhs, rhs, output, matmul_info);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs, dst);
+ ARM_COMPUTE_LOG_PARAMS(lhs, rhs, dst, matmul_info);
// Perform validation step
- ARM_COMPUTE_ERROR_THROW_ON(validate(lhs, rhs, output, matmul_info));
+ ARM_COMPUTE_ERROR_THROW_ON(validate(lhs, rhs, dst, matmul_info));
+
+ _is_quantized = is_data_type_quantized_asymmetric(lhs->data_type());
+
const GPUTarget gpu_target = CLScheduler::get().target();
std::unique_ptr<IClMatMulNativeKernelConfig> t = ClMatMulNativeKernelConfigurationFactory::create(gpu_target);
MatMulKernelInfo kernel_info = t->configure(lhs, rhs, matmul_info);
- // Set the target for the kernels
- _native_matmul_kernel->set_target(gpu_target);
+ if(_is_quantized)
+ {
+ _matmul_lowp_native_kernel->set_target(gpu_target);
- // Configure the native matrix multiply kernel
- _native_matmul_kernel->configure(compile_context, lhs, rhs, output, kernel_info);
+ // Configure the low-precision native matrix multiply kernel
+ _matmul_lowp_native_kernel->configure(compile_context, lhs, rhs, dst, kernel_info);
+ }
+ else
+ {
+ _matmul_native_kernel->set_target(gpu_target);
+
+ // Configure the native matrix multiply kernel
+ _matmul_native_kernel->configure(compile_context, lhs, rhs, dst, kernel_info);
+ }
}
+
void ClMatMul::run(ITensorPack &tensors)
{
- CLScheduler::get().enqueue_op(*_native_matmul_kernel, tensors, true);
+ if(_is_quantized)
+ {
+ CLScheduler::get().enqueue_op(*_matmul_lowp_native_kernel, tensors, true);
+ }
+ else
+ {
+ CLScheduler::get().enqueue_op(*_matmul_native_kernel, tensors, true);
+ }
}
+
} // namespace opencl
} // namespace arm_compute