diff options
author | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2017-07-03 17:41:47 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-09-17 14:15:39 +0100 |
commit | 8a383694445dfebb84732b19d5b3299961e8ffe3 (patch) | |
tree | 09f7521ec6112e7eab12ca2ea74cfbe59ea7d636 /src/core/CL/kernels | |
parent | bdb6b0bb156588dc39fd5084d4c91d05b5148610 (diff) | |
download | ComputeLibrary-8a383694445dfebb84732b19d5b3299961e8ffe3.tar.gz |
COMPMID-434 - Port CLGEMM to support 16 bit fixed point
Change-Id: I30aef3c7ecd1ee740c2a7f2ce65a63c7dcd66e49
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79630
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Diffstat (limited to 'src/core/CL/kernels')
4 files changed, 31 insertions, 8 deletions
diff --git a/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp b/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp index 3850c4d2cd..5b6e0ec6af 100644 --- a/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp +++ b/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp @@ -43,7 +43,8 @@ CLGEMMInterleave4x4Kernel::CLGEMMInterleave4x4Kernel() void CLGEMMInterleave4x4Kernel::configure(const ICLTensor *input, ICLTensor *output) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::U16, DataType::S16, DataType::U32, DataType::S32, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::U16, DataType::S16, DataType::QS16, DataType::U32, DataType::S32, DataType::F16, + DataType::F32); ARM_COMPUTE_ERROR_ON_NULLPTR(output); TensorShape output_shape = input->info()->tensor_shape(); diff --git a/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp index 5883dd698b..d1cdd7dc61 100644 --- a/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp @@ -43,7 +43,7 @@ CLGEMMMatrixAdditionKernel::CLGEMMMatrixAdditionKernel() void CLGEMMMatrixAdditionKernel::configure(const ICLTensor *input, ICLTensor *output, float beta) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != output->info()->dimension(0)); ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != output->info()->dimension(1)); @@ -53,8 +53,19 @@ void CLGEMMMatrixAdditionKernel::configure(const ICLTensor *input, ICLTensor *ou const unsigned int num_elems_processed_per_iteration = max_cl_vector_width / data_size_from_type(input->info()->data_type()); std::ostringstream ma_arguments; - ma_arguments << "-DBETA=" << (input->info()->data_type() == DataType::QS8 ? scvt_qs8_f32(beta, input->info()->fixed_point_position()) : beta) << " "; - ma_arguments << "-DFIXED_POINT_POSITION=" << input->info()->fixed_point_position(); + if(is_data_type_fixed_point(input->info()->data_type())) + { + ma_arguments << "-DBETA=" << (input->info()->data_type() == DataType::QS8 ? + scvt_qs8_f32(beta, input->info()->fixed_point_position()) : + scvt_qs16_f32(beta, input->info()->fixed_point_position())) + << " "; + ma_arguments << "-DFIXED_POINT_POSITION=" << input->info()->fixed_point_position(); + } + else + { + ma_arguments << "-DBETA=" << beta; + } + std::set<std::string> build_opts; build_opts.emplace(ma_arguments.str()); diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp index 7c5b3d7866..2d6b83a97d 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp @@ -50,7 +50,7 @@ CLGEMMMatrixMultiplyKernel::CLGEMMMatrixMultiplyKernel() void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, float alpha) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::QS8, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input0, input1, output); @@ -74,8 +74,18 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen std::ostringstream mm_arguments; mm_arguments << "-DWIDTH_MATRIX_B=" << input1->info()->dimension(0) << " "; - mm_arguments << "-DALPHA=" << (input0->info()->data_type() == DataType::QS8 ? scvt_qs8_f32(alpha, input0->info()->fixed_point_position()) : alpha) << " "; - mm_arguments << "-DFIXED_POINT_POSITION=" << input0->info()->fixed_point_position() << " "; + if(is_data_type_fixed_point(input0->info()->data_type())) + { + mm_arguments << "-DALPHA=" << (input0->info()->data_type() == DataType::QS8 ? + scvt_qs8_f32(alpha, input0->info()->fixed_point_position()) : + scvt_qs16_f32(alpha, input0->info()->fixed_point_position())) + << " "; + mm_arguments << "-DFIXED_POINT_POSITION=" << input0->info()->fixed_point_position() << " "; + } + else + { + mm_arguments << "-DALPHA=" << alpha << " "; + } std::set<std::string> build_opts; // Check if the output tensor is a vector. If so,the kernel runs the vector-matrix multiplication diff --git a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp index ecef7e1774..73c8429055 100644 --- a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp +++ b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp @@ -40,7 +40,8 @@ using namespace arm_compute; void CLGEMMTranspose1xWKernel::configure(const ICLTensor *input, ICLTensor *output) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::U16, DataType::S16, DataType::U32, DataType::S32, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::U16, DataType::S16, DataType::QS16, DataType::U32, DataType::S32, DataType::F16, + DataType::F32); ARM_COMPUTE_ERROR_ON_NULLPTR(output); TensorShape output_shape{ input->info()->tensor_shape() }; |