From 51847d5dd9cad6bc81673642a01fd531def44311 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Tue, 19 Oct 2021 15:45:57 +0100 Subject: Implement CLDirectConv3DKernel - uint8/int8 Resolve COMPMID-4663 Signed-off-by: Giorgio Arena Change-Id: I5c3c1cffed5385c06b789543318f7f4d6096987e Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6468 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Sheri Zhang --- arm_compute/runtime/CL/functions/CLConv3D.h | 2 + docs/user_guide/operator_list.dox | 2 + .../CL/cl_kernels/nhwc/direct_convolution3d.cl | 52 +++++++-- src/gpu/cl/kernels/ClDirectConv3dKernel.cpp | 52 ++++++++- src/gpu/cl/kernels/ClDirectConv3dKernel.h | 2 + src/gpu/cl/operators/ClDirectConv3d.h | 2 + tests/validation/CL/Convolution3D.cpp | 122 ++++++++++++++++++++- .../fixtures/DirectConvolution3DFixture.h | 51 ++++++--- tests/validation/reference/Conv3D.cpp | 110 ++++++++++++++++--- tests/validation/reference/Conv3D.h | 4 +- 10 files changed, 353 insertions(+), 46 deletions(-) diff --git a/arm_compute/runtime/CL/functions/CLConv3D.h b/arm_compute/runtime/CL/functions/CLConv3D.h index 241481b8ba..5728fe79d8 100644 --- a/arm_compute/runtime/CL/functions/CLConv3D.h +++ b/arm_compute/runtime/CL/functions/CLConv3D.h @@ -65,6 +65,8 @@ public: * |:--------------|:--------------|:------|:--------------| * |F16 |F16 |F16 |F16 | * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | * * @param[in] compile_context The compile context to be used. * @param[in] src Source tensor. 4 lower dimensions represent a single src [IFM, width, height, depth], diff --git a/docs/user_guide/operator_list.dox b/docs/user_guide/operator_list.dox index 1d06a394a9..55bfe38f55 100644 --- a/docs/user_guide/operator_list.dox +++ b/docs/user_guide/operator_list.dox @@ -629,6 +629,8 @@ where N = batches, C = channels, H = height, W = width, D = depth src0src1src2dst F16F16F16F16 F32F32F32F32 + QASYMM8QASYMM8S32QASYMM8 + QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED Copy diff --git a/src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl b/src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl index d11be5bbb3..587f3984ab 100644 --- a/src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl +++ b/src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl @@ -29,7 +29,7 @@ /** OpenCL kernel to compute the direct convolution 3d. * * @note Data layout supported: NDHWC - * @note Data type supported: F32/F16 + * @note Data type supported: F32/F16/QASYMM8/QASYMM8_SIGNED * @note The accumulation data type must be passed at compile time using -DDATA_TYPE (e.g. -DDATA_TYPE_PROMOTED=half) * @note The convolution padding (left, top and front) must be passed at compile time using -DPAD_LEFT, -DPAD_TOP and -DPAD_FRONT (e.g. -DPAD_LEFT=2, -DPAD_TOP=2, -DPAD_FRONT=2) * @note The convolution strides must be passed at compile time using -DSTRIDE_X, -DSTRIDE_Y and -DSTRIDE_Z (e.g. -DSTRIDE_X=2, -DSTRIDE_Y=2, -DSTRIDE_Z=2) @@ -44,12 +44,22 @@ * @note The number of N0 output channels to process must be passed at compile time using -DN0 (e.g. -DN0=2) * @note The number of K0 inner accumulations must be passed at compile time using -DK0 (e.g. -DK0=2) * @note The size of the partial store block in x must be passed at compile time using -DPARTIAL_N0 (e.g. -DPARTIAL_N0=1) + * @note The zero value must be passed at compile time using -DZERO_VALUE (e.g. -DZERO_VALUE=0) * @note Only the following configurations of M0, N0 and K0 are currently supported: * - M0 = 1, 2, 3, 4, 5, .... n * - N0 = 2, 3, 4, 8, 16 * - K0 = 2, 3, 4, 8, 16 * - * @note If biases are used then -DHAS_BIAS has to be passed at compile time + * @note In case of QASYMM8/QASYMM8_SIGNED, the following extra information must be passed at compile time: + * - -DIS_QUANTIZED + * - The destination quantization multiplier e.g. -DDST_MULTIPLIER=1234 + * - The destination quantization shift e.g. -DDST_SHIFT=4 + * - The destination offset e.g. -DDST_OFFSET=4 + * - The source offset e.g. -DSRC_OFFSET=4 + * - The weights offset e.g. -DWEI_OFFSET=4 + * - The quantized zero value e.g. -DZERO_VALUE=4 + * + * @note If biases are used then -DHAS_BIAS has to be passed at compile time along with its tensor type by using -DBIA_DATA_TYPE (e.g. -DBIA_DATA_TYPE=int). * * @param[in] src_ptr Pointer to the source tensor. Supported data type: F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) @@ -110,6 +120,13 @@ __kernel void direct_convolution3d_ndhwc( #define _IDST_CHANNELS DST_CHANNELS #define _IY_MULTIPLIER (_IWEI_WIDTH * _IWEI_HEIGHT * _IWEI_DEPTH) + // If quantized, the output tile has to be quantized first before being stored to global memory +#if defined(IS_QUANTIZED) +#define _IOUTPUT_TILE cq +#else // defined(IS_QUANTIZED) +#define _IOUTPUT_TILE c +#endif // defined(IS_QUANTIZED) + const int cout = GET_SPATIAL_IDX(0, N0, PARTIAL_N0); // OFM const int mout = GET_SPATIAL_IDX(1, M0, 0); // WIDTH x HEIGHT x DEPTH const int bout = GET_SPATIAL_IDX(2, 1, 0); // BATCH SIZE IDX @@ -153,7 +170,7 @@ __kernel void direct_convolution3d_ndhwc( LOOP_UNROLLING(int, i, 0, 1, M0, { - a[i].v = (DATA_TYPE)0; + a[i].v = ZERO_VALUE; }) // Load tile from the src tensor @@ -175,6 +192,10 @@ __kernel void direct_convolution3d_ndhwc( // Compute the matrix multiplication between two tiles T_MMUL(DATA_TYPE, DATA_TYPE, ACC_DATA_TYPE, M0, N0, K0, NT, T, a, b, c); + // Apply the offset correction (correction usually needed for asymmetric quantized computation) + // The computation is not performed if both SRC_OFFSET and WEI_OFFSET are zero + T_OFFSET_CORRECTION(ACC_DATA_TYPE, M0, N0, K0, SRC_OFFSET, WEI_OFFSET, a, b, c); + ck += K0; } @@ -187,7 +208,7 @@ __kernel void direct_convolution3d_ndhwc( LOOP_UNROLLING(int, i, 0, 1, M0, { - a[i].v = (DATA_TYPE)0; + a[i].v = ZERO_VALUE; }) // Load tile from the src tensor @@ -206,22 +227,30 @@ __kernel void direct_convolution3d_ndhwc( // // Compute the matrix multiplication between two tiles T_MMUL(DATA_TYPE, DATA_TYPE, ACC_DATA_TYPE, M0, N0, 1, NT, T, a, b, c); + // Apply the offset correction (operation usually needed for asymmetric quantized computation) + // The computation is not performed if both SRC_OFFSET and WEI_OFFSET are zero + T_OFFSET_CORRECTION(ACC_DATA_TYPE, M0, N0, 1, SRC_OFFSET, WEI_OFFSET, a, b, c); + ++ck; } #endif // ((_ISRC_CHANNELS % K0) != 0) } + // Offset correction required for the quantized asymmetric computation + // The computation is not performed if both SRC_OFFSET and WEI_OFFSET are zero + T_ADD_CONSTANT(ACC_DATA_TYPE, M0, N0, c, (_IWEI_WIDTH * _IWEI_HEIGHT * _IWEI_DEPTH * _ISRC_CHANNELS * SRC_OFFSET * WEI_OFFSET), c); + #if defined(HAS_BIAS) - TILE(DATA_TYPE, 1, N0, bias0); + TILE(BIA_DATA_TYPE, 1, N0, bias0); if((cout + N0) <= _IDST_CHANNELS) { - bias0[0].v = VLOAD(N0)(0, (__global DATA_TYPE *)(bia_ptr + bia_offset_first_element_in_bytes + cout * sizeof(DATA_TYPE))); + bias0[0].v = VLOAD(N0)(0, (__global BIA_DATA_TYPE *)(bia_ptr + bia_offset_first_element_in_bytes + cout * sizeof(BIA_DATA_TYPE))); } else { VLOAD_PARTIAL(N0, PARTIAL_N0) - (bias0[0].v, 0, (__global DATA_TYPE *)(bia_ptr + bia_offset_first_element_in_bytes + cout * sizeof(DATA_TYPE))); + (bias0[0].v, 0, (__global BIA_DATA_TYPE *)(bia_ptr + bia_offset_first_element_in_bytes + cout * sizeof(BIA_DATA_TYPE))); } // c = c + bias[broadcasted] @@ -238,8 +267,15 @@ __kernel void direct_convolution3d_ndhwc( dst_indirect_y[i].v += bout * (int)(_IDST_WIDTH *_IDST_HEIGHT * _IDST_DEPTH); }) +#if defined(IS_QUANTIZED) + TILE(DATA_TYPE, M0, N0, cq); + + // Quantize the tile + T_QUANTIZE8_ASYMMETRIC(ACC_DATA_TYPE, DATA_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, c, cq); +#endif // defined(IS_QUANTIZED) + bool x_cond = PARTIAL_N0 != 0 && get_global_id(0) == 0; // Store the tile in reverse order so the invalid values are overwritten with the valid ones - T_STORE_INDIRECT_WIDTH_SELECT(DATA_TYPE, M0, N0, PARTIAL_N0, BUFFER, dst, cout, dst_stride_y, x_cond, c, dst_indirect_y); + T_STORE_INDIRECT_WIDTH_SELECT(DATA_TYPE, M0, N0, PARTIAL_N0, BUFFER, dst, cout, dst_stride_y, x_cond, _IOUTPUT_TILE, dst_indirect_y); } \ No newline at end of file diff --git a/src/gpu/cl/kernels/ClDirectConv3dKernel.cpp b/src/gpu/cl/kernels/ClDirectConv3dKernel.cpp index 88e73dc72a..27afb7e190 100644 --- a/src/gpu/cl/kernels/ClDirectConv3dKernel.cpp +++ b/src/gpu/cl/kernels/ClDirectConv3dKernel.cpp @@ -25,6 +25,7 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "src/core/CL/CLValidate.h" #include "src/core/helpers/WindowHelpers.h" #include "support/Cast.h" @@ -44,7 +45,7 @@ Status validate_arguments(const ITensorInfo *src0, const ITensorInfo *src1, cons ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv3d_info.act_info.enabled(), "Fused activation not supported"); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src0); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src0, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src0, 1, DataType::F16, DataType::F32, DataType::QASYMM8, DataType::QASYMM8_SIGNED); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src0, src1); ARM_COMPUTE_RETURN_ERROR_ON_MSG(src1->dimension(1) != src0->dimension(0), "Weights feature map dimension should match the respective src's one"); @@ -56,7 +57,14 @@ Status validate_arguments(const ITensorInfo *src0, const ITensorInfo *src1, cons if(src2 != nullptr) { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src1, src2); + if(is_data_type_quantized(src0->data_type())) + { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src2, 1, DataType::S32); + } + else + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src1, src2); + } ARM_COMPUTE_RETURN_ERROR_ON_MSG(src2->dimension(0) != src1->dimension(0), "Biases size and number of dst feature maps should match"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(src2->num_dimensions() > 1, "Biases should be one dimensional"); } @@ -114,7 +122,6 @@ void ClDirectConv3dKernel::configure(const CLCompileContext &compile_context, co CLBuildOptions build_options; build_options.add_option("-cl-fast-relaxed-math"); build_options.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type)); - build_options.add_option("-DACC_DATA_TYPE=float"); build_options.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(src_width)); build_options.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(src_height)); build_options.add_option("-DSRC_DEPTH=" + support::cpp11::to_string(src_depth)); @@ -136,7 +143,44 @@ void ClDirectConv3dKernel::configure(const CLCompileContext &compile_context, co build_options.add_option("-DM0=" + support::cpp11::to_string(m0)); build_options.add_option("-DK0=" + support::cpp11::to_string(k0)); build_options.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - build_options.add_option_if(src2 != nullptr, std::string("-DHAS_BIAS")); + + if(src2 != nullptr) + { + build_options.add_option(std::string("-DHAS_BIAS")); + build_options.add_option(std::string("-DBIA_DATA_TYPE=" + get_cl_type_from_data_type(src2->data_type()))); + } + + if(is_data_type_quantized(data_type)) + { + const UniformQuantizationInfo iqinfo = src0->quantization_info().uniform(); + const UniformQuantizationInfo wqinfo = src1->quantization_info().uniform(); + const UniformQuantizationInfo oqinfo = dst->quantization_info().uniform(); + + PixelValue zero_value = PixelValue(0, src0->data_type(), src0->quantization_info()); + int zero_value_s32; + zero_value.get(zero_value_s32); + + float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale; + int output_multiplier = 0; + int output_shift = 0; + quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift); + build_options.add_option("-DIS_QUANTIZED"); + build_options.add_option("-DDST_MULTIPLIER=" + support::cpp11::to_string(output_multiplier)); + build_options.add_option("-DDST_SHIFT=" + support::cpp11::to_string(output_shift)); + build_options.add_option("-DSRC_OFFSET=" + support::cpp11::to_string(-iqinfo.offset)); + build_options.add_option("-DWEI_OFFSET=" + support::cpp11::to_string(-wqinfo.offset)); + build_options.add_option("-DDST_OFFSET=" + support::cpp11::to_string(oqinfo.offset)); + build_options.add_option("-DZERO_VALUE=" + support::cpp11::to_string(zero_value_s32)); + build_options.add_option("-DACC_DATA_TYPE=" + get_cl_type_from_data_type(DataType::S32)); + } + else + { + build_options.add_option("-DACC_DATA_TYPE=" + get_cl_type_from_data_type(DataType::F32)); + build_options.add_option("-DZERO_VALUE=" + support::cpp11::to_string(0)); + build_options.add_option("-DSRC_OFFSET=" + support::cpp11::to_string(0)); + build_options.add_option("-DWEI_OFFSET=" + support::cpp11::to_string(0)); + build_options.add_option("-DDST_OFFSET=" + support::cpp11::to_string(0)); + } std::string kernel_name = "direct_convolution3d_ndhwc"; _kernel = create_kernel(compile_context, kernel_name, build_options.options()); diff --git a/src/gpu/cl/kernels/ClDirectConv3dKernel.h b/src/gpu/cl/kernels/ClDirectConv3dKernel.h index 485c900826..de4f0ce216 100644 --- a/src/gpu/cl/kernels/ClDirectConv3dKernel.h +++ b/src/gpu/cl/kernels/ClDirectConv3dKernel.h @@ -59,6 +59,8 @@ public: * |:--------------|:--------------|:------|:--------------| * |F16 |F16 |F16 |F16 | * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | * * @param[in] compile_context The compile context to be used. * @param[in] src0 Source tensor. 4 lower dimensions represent a single src [IFM, width, height, depth], diff --git a/src/gpu/cl/operators/ClDirectConv3d.h b/src/gpu/cl/operators/ClDirectConv3d.h index d8ffefc450..fa58b5aedd 100644 --- a/src/gpu/cl/operators/ClDirectConv3d.h +++ b/src/gpu/cl/operators/ClDirectConv3d.h @@ -55,6 +55,8 @@ public: * |:--------------|:--------------|:------|:--------------| * |F16 |F16 |F16 |F16 | * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | * * @param[in] compile_context The compile context to be used. * @param[in] src0 Source tensor. 4 lower dimensions represent a single src [IFM, width, height, depth], diff --git a/tests/validation/CL/Convolution3D.cpp b/tests/validation/CL/Convolution3D.cpp index 75e2e99b03..381aacc465 100644 --- a/tests/validation/CL/Convolution3D.cpp +++ b/tests/validation/CL/Convolution3D.cpp @@ -38,10 +38,11 @@ namespace validation { namespace { -RelativeTolerance tolerance_fp16(half(0.2)); /**< Tolerance for floating point tests */ -RelativeTolerance tolerance_fp32(0.05f); /**< Tolerance for floating point tests */ -constexpr float abs_tolerance_f32(0.0001f); /**< Absolute tolerance for FP32 tests*/ -constexpr float tolerance_num = 0.07f; /**< Tolerance number */ +RelativeTolerance tolerance_fp16(half(0.2)); /**< Tolerance for floating point tests */ +RelativeTolerance tolerance_fp32(0.05f); /**< Tolerance for floating point tests */ +constexpr AbsoluteTolerance tolerance_qasymm8(1); /**< Tolerance for quantized tests */ +constexpr float abs_tolerance_f32(0.0001f); /**< Absolute tolerance for FP32 tests*/ +constexpr float tolerance_num = 0.07f; /**< Tolerance number */ } // namespace TEST_SUITE(CL) @@ -165,6 +166,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi template using CLDirectConvolution3DFixture = DirectConvolution3DValidationFixture; +template +using CLDirectConvolution3DQuantizedFixture = DirectConvolution3DValidationQuantizedFixture; TEST_SUITE(NDHWC) TEST_SUITE(FP16) @@ -266,6 +269,117 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolution3DFixture, framework: // clang-format on // *INDENT-ON* TEST_SUITE_END() // FP32 + +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolution3DQuantizedFixture, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(combine(combine(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(7U, 5U, 3U, 13U, 3U), + TensorShape(15U, 7U, 11U, 7U), + TensorShape(19U, 5U, 16U, 4U), + TensorShape(13U, 5U, 17U, 2U) + }), + framework::dataset::make("StrideX", { 1, 3, 2, 1 })), + framework::dataset::make("StrideY", { 2, 1, 3, 1 })), + framework::dataset::make("StrideZ", { 3, 2, 1, 1 })), + framework::dataset::make("PadX", { 0, 2, 1, 0 })), + framework::dataset::make("PadY", { 1, 0, 2, 0 })), + framework::dataset::make("PadZ", { 2, 1, 0, 0 })), + framework::dataset::make("KernelWidth", { 3, 7, 5, 1 })), + framework::dataset::make("KernelHeight", { 5, 3, 7, 1 })), + framework::dataset::make("KernelDepth", { 7, 5, 3, 1 })), + framework::dataset::make("NumKernels", { 5, 3, 1, 11 })), + framework::dataset::make("HasBias", { true, true, true, false })), + framework::dataset::make("Activation", ActivationLayerInfo())), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("DataLayout", DataLayout::NDHWC)), + framework::dataset::make("SrcQuantizationInfo", QuantizationInfo(0.1f, 10))), + framework::dataset::make("WeightsQuantizationInfo", QuantizationInfo(0.3f, 20))), + framework::dataset::make("DstQuantizationInfo", QuantizationInfo(0.2f, 5)))) +{ + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolution3DQuantizedFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(400U, 400U, 200U, 11U) }), + framework::dataset::make("StrideX", { 1 })), + framework::dataset::make("StrideY", { 1 })), + framework::dataset::make("StrideZ", { 1 })), + framework::dataset::make("PadX", { 1 })), + framework::dataset::make("PadY", { 1 })), + framework::dataset::make("PadZ", { 1 })), + framework::dataset::make("KernelWidth", { 9 })), + framework::dataset::make("KernelHeight", { 9 })), + framework::dataset::make("KernelDepth", { 9 })), + framework::dataset::make("NumKernels", { 300 })), + framework::dataset::make("HasBias", { true })), + framework::dataset::make("Activation", ActivationLayerInfo())), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("DataLayout", DataLayout::NDHWC)), + framework::dataset::make("SrcQuantizationInfo", QuantizationInfo(0.1f, 10))), + framework::dataset::make("WeightsQuantizationInfo", QuantizationInfo(0.3f, 20))), + framework::dataset::make("DstQuantizationInfo", QuantizationInfo(0.2f, 5)))) +{ + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} + +TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolution3DQuantizedFixture, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(combine(combine(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(7U, 5U, 3U, 13U, 3U), + TensorShape(15U, 7U, 11U, 7U), + TensorShape(19U, 5U, 16U, 4U), + TensorShape(13U, 5U, 17U, 2U) + }), + framework::dataset::make("StrideX", { 1, 3, 2, 1 })), + framework::dataset::make("StrideY", { 2, 1, 3, 1 })), + framework::dataset::make("StrideZ", { 3, 2, 1, 1 })), + framework::dataset::make("PadX", { 0, 2, 1, 0 })), + framework::dataset::make("PadY", { 1, 0, 2, 0 })), + framework::dataset::make("PadZ", { 2, 1, 0, 0 })), + framework::dataset::make("KernelWidth", { 3, 7, 5, 1 })), + framework::dataset::make("KernelHeight", { 5, 3, 7, 1 })), + framework::dataset::make("KernelDepth", { 7, 5, 3, 1 })), + framework::dataset::make("NumKernels", { 5, 3, 1, 11 })), + framework::dataset::make("HasBias", { true, true, true, false })), + framework::dataset::make("Activation", ActivationLayerInfo())), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("DataLayout", DataLayout::NDHWC)), + framework::dataset::make("SrcQuantizationInfo", QuantizationInfo(0.1f, 10))), + framework::dataset::make("WeightsQuantizationInfo", QuantizationInfo(0.3f, 20))), + framework::dataset::make("DstQuantizationInfo", QuantizationInfo(0.2f, 5)))) +{ + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolution3DQuantizedFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(400U, 400U, 200U, 11U) }), + framework::dataset::make("StrideX", { 1 })), + framework::dataset::make("StrideY", { 1 })), + framework::dataset::make("StrideZ", { 1 })), + framework::dataset::make("PadX", { 1 })), + framework::dataset::make("PadY", { 1 })), + framework::dataset::make("PadZ", { 1 })), + framework::dataset::make("KernelWidth", { 9 })), + framework::dataset::make("KernelHeight", { 9 })), + framework::dataset::make("KernelDepth", { 9 })), + framework::dataset::make("NumKernels", { 300 })), + framework::dataset::make("HasBias", { true })), + framework::dataset::make("Activation", ActivationLayerInfo())), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("DataLayout", DataLayout::NDHWC)), + framework::dataset::make("SrcQuantizationInfo", QuantizationInfo(0.1f, 10))), + framework::dataset::make("WeightsQuantizationInfo", QuantizationInfo(0.3f, 20))), + framework::dataset::make("DstQuantizationInfo", QuantizationInfo(0.2f, 5)))) +{ + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} + +TEST_SUITE_END() // QASYMM8_SIGNED + TEST_SUITE_END() // NDHWC TEST_SUITE_END() // DirectConvolution3D TEST_SUITE_END() // CL diff --git a/tests/validation/fixtures/DirectConvolution3DFixture.h b/tests/validation/fixtures/DirectConvolution3DFixture.h index 3a675ac6d3..2250dcaeb0 100644 --- a/tests/validation/fixtures/DirectConvolution3DFixture.h +++ b/tests/validation/fixtures/DirectConvolution3DFixture.h @@ -40,19 +40,23 @@ template ::value || std::is_same::value, int32_t, T >::type; + template void setup(const TensorShape &input_shape, int stride_x, int stride_y, int stride_z, int pad_x, int pad_y, int pad_z, unsigned int kernel_width, int kernel_height, int kernel_depth, - unsigned int num_kernels, bool has_bias, const ActivationLayerInfo &act_info, const DataType &data_type, const DataLayout &data_layout) + unsigned int num_kernels, bool has_bias, const ActivationLayerInfo &act_info, const DataType &data_type, const DataLayout &data_layout, + const QuantizationInfo &src_qinfo = QuantizationInfo(), const QuantizationInfo &weights_qinfo = QuantizationInfo(), const QuantizationInfo &dst_qinfo = QuantizationInfo()) { ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NDHWC); const TensorShape weights_shape(num_kernels, input_shape[0], kernel_width, kernel_height, kernel_depth); const TensorShape bias_shape(num_kernels); + const DataType bias_data_type = is_data_type_quantized(data_type) ? DataType::S32 : data_type; const Conv3dInfo conv3d_info(Size3D(stride_x, stride_y, stride_z), Padding3D(pad_x, pad_y, pad_z), act_info, Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false); const TensorShape output_shape = compute_conv3d_shape(input_shape, weights_shape, conv3d_info); - _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, conv3d_info, has_bias, data_type, data_layout); - _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, conv3d_info, has_bias, data_type); + _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, conv3d_info, has_bias, data_type, bias_data_type, data_layout, src_qinfo, weights_qinfo, dst_qinfo); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, conv3d_info, has_bias, data_type, bias_data_type, src_qinfo, weights_qinfo, dst_qinfo); } protected: @@ -79,13 +83,14 @@ protected: } TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const Conv3dInfo &conv3d_info, - bool has_bias, const DataType &data_type, const DataLayout &data_layout) + bool has_bias, const DataType &data_type, const DataType &bias_data_type, const DataLayout &data_layout, const QuantizationInfo &src_qinfo, + const QuantizationInfo &weights_qinfo, const QuantizationInfo &dst_qinfo) { // Create tensors - TensorType src = create_tensor(input_shape, data_type, 1, QuantizationInfo(), data_layout); - TensorType weights = create_tensor(weights_shape, data_type, 1, QuantizationInfo(), data_layout); - TensorType bias = has_bias ? create_tensor(bias_shape, data_type, 1, QuantizationInfo()) : TensorType(); - TensorType dst = create_tensor(output_shape, data_type, 1, QuantizationInfo(), data_layout); + TensorType src = create_tensor(input_shape, data_type, 1, src_qinfo, data_layout); + TensorType weights = create_tensor(weights_shape, data_type, 1, weights_qinfo, data_layout); + TensorType bias = has_bias ? create_tensor(bias_shape, bias_data_type, 1, QuantizationInfo()) : TensorType(); + TensorType dst = create_tensor(output_shape, data_type, 1, dst_qinfo, data_layout); // Create and configure function FunctionType conv{}; @@ -122,14 +127,15 @@ protected: return dst; } - SimpleTensor compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const Conv3dInfo &conv3d_info, - bool has_bias, const DataType &data_type) + SimpleTensor compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, + const Conv3dInfo &conv3d_info, bool has_bias, const DataType &data_type, const DataType &bias_data_type, const QuantizationInfo &src_qinfo, + const QuantizationInfo &weights_qinfo, const QuantizationInfo &dst_qinfo) { // Create reference - SimpleTensor src{ input_shape, data_type }; - SimpleTensor weights{ weights_shape, data_type }; - SimpleTensor bias{ bias_shape, data_type }; - SimpleTensor dst{ output_shape, data_type }; + SimpleTensor src{ input_shape, data_type, 1, src_qinfo }; + SimpleTensor weights{ weights_shape, data_type, 1, weights_qinfo }; + SimpleTensor bias{ bias_shape, bias_data_type }; + SimpleTensor dst{ output_shape, data_type, 1, dst_qinfo }; // Fill reference fill(src, 0); @@ -140,7 +146,7 @@ protected: fill(bias, 2); } - return reference::activation_layer(reference::conv3d(src, weights, bias, dst, conv3d_info), conv3d_info.act_info); + return reference::activation_layer(reference::conv3d(src, weights, bias, dst, conv3d_info), conv3d_info.act_info); } TensorType _target{}; @@ -159,6 +165,21 @@ public: kernel_depth, num_kernels, has_bias, act_info, data_type, data_layout); } }; + +template +class DirectConvolution3DValidationQuantizedFixture : public DirectConvolution3DValidationGenericFixture +{ +public: + template + void setup(TensorShape input_shape, int stride_x, int stride_y, int stride_z, int pad_x, int pad_y, int pad_z, unsigned int kernel_width, int kernel_height, int kernel_depth, + unsigned int num_kernels, bool has_bias, ActivationLayerInfo act_info, DataType data_type, DataLayout data_layout, QuantizationInfo src_qinfo, QuantizationInfo weights_qinfo, + QuantizationInfo dst_qinfo) + { + DirectConvolution3DValidationGenericFixture::setup(input_shape, stride_x, stride_y, stride_z, pad_x, pad_y, pad_z, kernel_width, kernel_height, + kernel_depth, num_kernels, has_bias, act_info, data_type, data_layout, src_qinfo, + weights_qinfo, dst_qinfo); + } +}; } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/reference/Conv3D.cpp b/tests/validation/reference/Conv3D.cpp index ad61105b36..706059d1cb 100644 --- a/tests/validation/reference/Conv3D.cpp +++ b/tests/validation/reference/Conv3D.cpp @@ -22,7 +22,11 @@ * SOFTWARE. */ #include "Conv3D.h" + #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "support/Requires.h" +#include "tests/validation/reference/UtilsQuantizedAsymm.h" // Source/Destination Tensor shape indices (N D H W C) constexpr unsigned int batch_dim = 4u; @@ -52,11 +56,14 @@ inline bool is_valid_pixel(int i, int min, int max) { return (i >= min && i < max); } + // Evaluate the weights against an element in a given tensor. -template -T calculate_conv3d(const SimpleTensor &src, const SimpleTensor &weights, const Size3D &dilation, int batch, - int z_start, int y_start, int x_start, int ch_out) +template < typename T, typename TB, typename std::enable_if < validation::is_floating_point::value &&validation::is_floating_point::value, int >::type = 0 > +T calculate_conv3d(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const Size3D &dilation, int batch, + int z_start, int y_start, int x_start, int ch_out, UniformQuantizationInfo oq_info) { + ARM_COMPUTE_UNUSED(oq_info); + const unsigned int weights_width = weights.shape()[weights_width_dim]; const unsigned int weights_height = weights.shape()[weights_height_dim]; const unsigned int weights_depth = weights.shape()[weights_depth_dim]; @@ -101,12 +108,89 @@ T calculate_conv3d(const SimpleTensor &src, const SimpleTensor &weights, c } } } - return total; + + const TB *b_ptr = bias.data(); + TB bias_value = b_ptr[ch_out]; + + return total + bias_value; } + +template < typename T, typename TB, ARM_COMPUTE_REQUIRES_TA(std::is_same::value || std::is_same::value) > +T calculate_conv3d(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const Size3D &dilation, int batch, + int z_start, int y_start, int x_start, int ch_out, UniformQuantizationInfo oq_info) +{ + const unsigned int weights_width = weights.shape()[weights_width_dim]; + const unsigned int weights_height = weights.shape()[weights_height_dim]; + const unsigned int weights_depth = weights.shape()[weights_depth_dim]; + + const unsigned int src_channels = src.shape()[channel_dim]; + const unsigned int src_width = src.shape()[width_dim]; + const unsigned int src_height = src.shape()[height_dim]; + const unsigned int src_depth = src.shape()[depth_dim]; + + const UniformQuantizationInfo iq_info = src.quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights.quantization_info().uniform(); + + const int input_offset = -iq_info.offset; + const float input_scale = iq_info.scale; + int weights_offset = -wq_info.offset; + float weights_scale = wq_info.scale; + const int output_offset = oq_info.offset; + const float output_scale = oq_info.scale; + + int output_multiplier = 0; + int output_shift = 0; + const float multiplier = input_scale * weights_scale / output_scale; + arm_compute::quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift); + + int32_t total(0); + for(unsigned int weight_d = 0; weight_d < weights_depth; ++weight_d) + { + const int idx_z = z_start + dilation.depth * weight_d; + for(unsigned int weight_y = 0; weight_y < weights_height; ++weight_y) + { + const int idx_y = y_start + dilation.height * weight_y; + for(unsigned int weight_x = 0; weight_x < weights_width; ++weight_x) + { + const int idx_x = x_start + dilation.width * weight_x; + + //Check if the point is within padding + const bool is_x_valid = is_valid_pixel(idx_x, 0, src_width); + const bool is_y_valid = is_valid_pixel(idx_y, 0, src_height); + const bool is_z_valid = is_valid_pixel(idx_z, 0, src_depth); + const bool is_invalid_pixel = !(is_x_valid && is_y_valid && is_z_valid); + if(is_invalid_pixel) + { + continue; + } + + for(unsigned int ch_in = 0; ch_in < src_channels; ++ch_in) + { + const T *in_ptr = src.data(); + const T *w_ptr = weights.data(); + + const int in_offset = coord2index(src.shape(), Coordinates{ ch_in, idx_x, idx_y, idx_z, batch }); + const int weight_offset = coord2index(weights.shape(), Coordinates{ ch_out, ch_in, weight_x, weight_y, weight_d }); + T input_value = in_ptr[in_offset]; + T weight_value = w_ptr[weight_offset]; + total += ((input_value + input_offset) * (weight_value + weights_offset)); + } + } + } + } + + const TB *b_ptr = bias.data(); + TB bias_value = b_ptr[ch_out]; + + total += bias_value; + + return validation::quantize_down_scale_by_fixedpoint(total, output_multiplier, output_shift, output_offset, + std::numeric_limits::lowest(), std::numeric_limits::max()); } +} // namespace -template -SimpleTensor conv3d(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, SimpleTensor &dst, const Conv3dInfo &conv3d_info) +template +SimpleTensor conv3d(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, SimpleTensor &dst, const Conv3dInfo &conv3d_info) { // Compute reference const unsigned int batch_size = src.shape()[batch_dim]; @@ -150,14 +234,10 @@ SimpleTensor conv3d(const SimpleTensor &src, const SimpleTensor &weight const int x_start = (x_out * stride_x) - pad_left; for(unsigned int ch_out = 0; ch_out < dst_channels; ++ch_out) { - T weighted_value = calculate_conv3d(src, weights, conv3d_info.dilation, batch, z_start, - y_start, x_start, ch_out); - T *out_ptr = dst.data(); - const T *b_ptr = bias.data(); - T bias_value(0); + T *out_ptr = dst.data(); + const int out_offset = coord2index(dst.shape(), Coordinates{ ch_out, x_out, y_out, z_out, batch }); - bias_value = b_ptr[ch_out]; - out_ptr[out_offset] = weighted_value + bias_value; + out_ptr[out_offset] = calculate_conv3d(src, weights, bias, conv3d_info.dilation, batch, z_start, y_start, x_start, ch_out, dst.quantization_info().uniform()); } } } @@ -170,6 +250,10 @@ template SimpleTensor conv3d(const SimpleTensor &src, const Simple const Conv3dInfo &conv3d_info); template SimpleTensor conv3d(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, SimpleTensor &dst, const Conv3dInfo &conv3d_info); +template SimpleTensor conv3d(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, SimpleTensor &dst, + const Conv3dInfo &conv3d_info); +template SimpleTensor conv3d(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, SimpleTensor &dst, + const Conv3dInfo &conv3d_info); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/Conv3D.h b/tests/validation/reference/Conv3D.h index ade8a2c242..e3674f4bfb 100644 --- a/tests/validation/reference/Conv3D.h +++ b/tests/validation/reference/Conv3D.h @@ -37,8 +37,8 @@ namespace validation { namespace reference { -template -SimpleTensor conv3d(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, SimpleTensor &dst, +template +SimpleTensor conv3d(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, SimpleTensor &dst, const Conv3dInfo &conv3d_info); } // namespace reference } // namespace validation -- cgit v1.2.1