diff options
author | Giorgio Arena <giorgio.arena@arm.com> | 2021-10-19 15:45:57 +0100 |
---|---|---|
committer | Sheri Zhang <sheri.zhang@arm.com> | 2021-10-20 15:54:24 +0000 |
commit | 51847d5dd9cad6bc81673642a01fd531def44311 (patch) | |
tree | 1c5b79334d054141308c1e03e05749ac33ed5d34 /src/gpu/cl | |
parent | 7a8cf1707e45ea011e0db5c0b3091c381ccd387f (diff) | |
download | ComputeLibrary-51847d5dd9cad6bc81673642a01fd531def44311.tar.gz |
Implement CLDirectConv3DKernel - uint8/int8
Resolve COMPMID-4663
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Change-Id: I5c3c1cffed5385c06b789543318f7f4d6096987e
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6468
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Sheri Zhang <sheri.zhang@arm.com>
Diffstat (limited to 'src/gpu/cl')
-rw-r--r-- | src/gpu/cl/kernels/ClDirectConv3dKernel.cpp | 52 | ||||
-rw-r--r-- | src/gpu/cl/kernels/ClDirectConv3dKernel.h | 2 | ||||
-rw-r--r-- | src/gpu/cl/operators/ClDirectConv3d.h | 2 |
3 files changed, 52 insertions, 4 deletions
diff --git a/src/gpu/cl/kernels/ClDirectConv3dKernel.cpp b/src/gpu/cl/kernels/ClDirectConv3dKernel.cpp index 88e73dc72a..27afb7e190 100644 --- a/src/gpu/cl/kernels/ClDirectConv3dKernel.cpp +++ b/src/gpu/cl/kernels/ClDirectConv3dKernel.cpp @@ -25,6 +25,7 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "src/core/CL/CLValidate.h" #include "src/core/helpers/WindowHelpers.h" #include "support/Cast.h" @@ -44,7 +45,7 @@ Status validate_arguments(const ITensorInfo *src0, const ITensorInfo *src1, cons ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv3d_info.act_info.enabled(), "Fused activation not supported"); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src0); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src0, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src0, 1, DataType::F16, DataType::F32, DataType::QASYMM8, DataType::QASYMM8_SIGNED); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src0, src1); ARM_COMPUTE_RETURN_ERROR_ON_MSG(src1->dimension(1) != src0->dimension(0), "Weights feature map dimension should match the respective src's one"); @@ -56,7 +57,14 @@ Status validate_arguments(const ITensorInfo *src0, const ITensorInfo *src1, cons if(src2 != nullptr) { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src1, src2); + if(is_data_type_quantized(src0->data_type())) + { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src2, 1, DataType::S32); + } + else + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src1, src2); + } ARM_COMPUTE_RETURN_ERROR_ON_MSG(src2->dimension(0) != src1->dimension(0), "Biases size and number of dst feature maps should match"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(src2->num_dimensions() > 1, "Biases should be one dimensional"); } @@ -114,7 +122,6 @@ void ClDirectConv3dKernel::configure(const CLCompileContext &compile_context, co CLBuildOptions build_options; build_options.add_option("-cl-fast-relaxed-math"); build_options.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type)); - build_options.add_option("-DACC_DATA_TYPE=float"); build_options.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(src_width)); build_options.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(src_height)); build_options.add_option("-DSRC_DEPTH=" + support::cpp11::to_string(src_depth)); @@ -136,7 +143,44 @@ void ClDirectConv3dKernel::configure(const CLCompileContext &compile_context, co build_options.add_option("-DM0=" + support::cpp11::to_string(m0)); build_options.add_option("-DK0=" + support::cpp11::to_string(k0)); build_options.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - build_options.add_option_if(src2 != nullptr, std::string("-DHAS_BIAS")); + + if(src2 != nullptr) + { + build_options.add_option(std::string("-DHAS_BIAS")); + build_options.add_option(std::string("-DBIA_DATA_TYPE=" + get_cl_type_from_data_type(src2->data_type()))); + } + + if(is_data_type_quantized(data_type)) + { + const UniformQuantizationInfo iqinfo = src0->quantization_info().uniform(); + const UniformQuantizationInfo wqinfo = src1->quantization_info().uniform(); + const UniformQuantizationInfo oqinfo = dst->quantization_info().uniform(); + + PixelValue zero_value = PixelValue(0, src0->data_type(), src0->quantization_info()); + int zero_value_s32; + zero_value.get(zero_value_s32); + + float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale; + int output_multiplier = 0; + int output_shift = 0; + quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift); + build_options.add_option("-DIS_QUANTIZED"); + build_options.add_option("-DDST_MULTIPLIER=" + support::cpp11::to_string(output_multiplier)); + build_options.add_option("-DDST_SHIFT=" + support::cpp11::to_string(output_shift)); + build_options.add_option("-DSRC_OFFSET=" + support::cpp11::to_string(-iqinfo.offset)); + build_options.add_option("-DWEI_OFFSET=" + support::cpp11::to_string(-wqinfo.offset)); + build_options.add_option("-DDST_OFFSET=" + support::cpp11::to_string(oqinfo.offset)); + build_options.add_option("-DZERO_VALUE=" + support::cpp11::to_string(zero_value_s32)); + build_options.add_option("-DACC_DATA_TYPE=" + get_cl_type_from_data_type(DataType::S32)); + } + else + { + build_options.add_option("-DACC_DATA_TYPE=" + get_cl_type_from_data_type(DataType::F32)); + build_options.add_option("-DZERO_VALUE=" + support::cpp11::to_string(0)); + build_options.add_option("-DSRC_OFFSET=" + support::cpp11::to_string(0)); + build_options.add_option("-DWEI_OFFSET=" + support::cpp11::to_string(0)); + build_options.add_option("-DDST_OFFSET=" + support::cpp11::to_string(0)); + } std::string kernel_name = "direct_convolution3d_ndhwc"; _kernel = create_kernel(compile_context, kernel_name, build_options.options()); diff --git a/src/gpu/cl/kernels/ClDirectConv3dKernel.h b/src/gpu/cl/kernels/ClDirectConv3dKernel.h index 485c900826..de4f0ce216 100644 --- a/src/gpu/cl/kernels/ClDirectConv3dKernel.h +++ b/src/gpu/cl/kernels/ClDirectConv3dKernel.h @@ -59,6 +59,8 @@ public: * |:--------------|:--------------|:------|:--------------| * |F16 |F16 |F16 |F16 | * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | * * @param[in] compile_context The compile context to be used. * @param[in] src0 Source tensor. 4 lower dimensions represent a single src [IFM, width, height, depth], diff --git a/src/gpu/cl/operators/ClDirectConv3d.h b/src/gpu/cl/operators/ClDirectConv3d.h index d8ffefc450..fa58b5aedd 100644 --- a/src/gpu/cl/operators/ClDirectConv3d.h +++ b/src/gpu/cl/operators/ClDirectConv3d.h @@ -55,6 +55,8 @@ public: * |:--------------|:--------------|:------|:--------------| * |F16 |F16 |F16 |F16 | * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | * * @param[in] compile_context The compile context to be used. * @param[in] src0 Source tensor. 4 lower dimensions represent a single src [IFM, width, height, depth], |