diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2017-07-10 16:05:21 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-09-17 14:15:39 +0100 |
commit | 097967568f9363d06df3ac21403edcab57de39d7 (patch) | |
tree | 1e731ebc741076efd230d88d08eac99fa4284b6f | |
parent | dcdc85ef876e854749db58ecd60c37f64a627536 (diff) | |
download | ComputeLibrary-097967568f9363d06df3ac21403edcab57de39d7.tar.gz |
COMPMID-429: Port CLSoftmaxLayer to QS16.
Change-Id: I3a0394364629654747439372d32f692b6ca29ee0
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/80219
Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com>
-rw-r--r-- | arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h | 6 | ||||
-rw-r--r-- | arm_compute/runtime/CL/functions/CLSoftmaxLayer.h | 2 | ||||
-rw-r--r-- | src/core/CL/cl_kernels/fixed_point.h | 12 | ||||
-rw-r--r-- | src/core/CL/cl_kernels/softmax_layer.cl | 6 | ||||
-rw-r--r-- | src/core/CL/kernels/CLSoftmaxLayerKernel.cpp | 6 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLSoftmaxLayer.cpp | 2 | ||||
-rw-r--r-- | tests/validation/CL/SoftmaxLayer.cpp | 61 |
7 files changed, 70 insertions, 25 deletions
diff --git a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h index 024169a7c5..137b60b25f 100644 --- a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h @@ -36,7 +36,7 @@ class CLLogits1DMaxKernel : public ICLSimple2DKernel public: /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: QS8/F16/F32 + * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32 * @param[out] output Destination tensor. Data types supported: same as @p input */ void configure(const ICLTensor *input, ICLTensor *output); @@ -58,7 +58,7 @@ public: CLLogits1DShiftExpSumKernel &operator=(CLLogits1DShiftExpSumKernel &&) = default; /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: QS8/F16/F32 + * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32 * @param[in] max Max values tensor. Data types supported: same as @p input * @param[out] output Destination tensor. Data types supported: same as @p input * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input @@ -91,7 +91,7 @@ public: CLLogits1DNormKernel &operator=(CLLogits1DNormKernel &&) = default; /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: QS8/F16/F32 + * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32 * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input * @param[out] output Destination tensor. Data types supported: same as @p input */ diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h index a5b19bd5e1..18f7a02a3e 100644 --- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h +++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h @@ -49,7 +49,7 @@ public: CLSoftmaxLayer(); /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: QS8/F16/F32 + * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32 * @param[out] output Destination tensor. Data types supported: same as @p input */ void configure(const ICLTensor *input, ICLTensor *output); diff --git a/src/core/CL/cl_kernels/fixed_point.h b/src/core/CL/cl_kernels/fixed_point.h index bf126a2fdf..9fd3a6f899 100644 --- a/src/core/CL/cl_kernels/fixed_point.h +++ b/src/core/CL/cl_kernels/fixed_point.h @@ -116,6 +116,11 @@ MAXQ_IMPL(qs8x2) MAXQ_IMPL(qs8x4) MAXQ_IMPL(qs8x8) MAXQ_IMPL(qs8x16) +MAXQ_IMPL(qs16x1) +MAXQ_IMPL(qs16x2) +MAXQ_IMPL(qs16x4) +MAXQ_IMPL(qs16x8) +MAXQ_IMPL(qs16x16) #define MAX_OP_EXPAND_STR(a, b, type, size) max_##type##x##size((a), (b)) #define MAX_OP_EXPAND(a, b, type, size) MAX_OP_EXPAND_STR(a, b, type, size) @@ -163,6 +168,11 @@ SUBQ_SAT_IMPL(qs8x2) SUBQ_SAT_IMPL(qs8x4) SUBQ_SAT_IMPL(qs8x8) SUBQ_SAT_IMPL(qs8x16) +SUBQ_SAT_IMPL(qs16x1) +SUBQ_SAT_IMPL(qs16x2) +SUBQ_SAT_IMPL(qs16x4) +SUBQ_SAT_IMPL(qs16x8) +SUBQ_SAT_IMPL(qs16x16) #define SUB_SAT_OP_EXPAND_STR(a, b, type, size) sub_sat_##type##x##size((a), (b)) #define SUB_SAT_OP_EXPAND(a, b, type, size) SUB_SAT_OP_EXPAND_STR(a, b, type, size) @@ -270,6 +280,7 @@ MLALQ_SAT_IMPL(qs16x8, qs32x8) } DIVQ_SAT_IMPL(qs8, qs8x16, qs16x16) +DIVQ_SAT_IMPL(qs16, qs16x16, qs32x16) #define DIV_SAT_OP_EXPAND_STR(a, b, type, size, position) div_sat_##type##x##size((a), (b), (position)) #define DIV_SAT_OP_EXPAND(a, b, type, size, position) DIV_SAT_OP_EXPAND_STR(a, b, type, size, position) @@ -304,6 +315,7 @@ DIVQ_SAT_IMPL(qs8, qs8x16, qs16x16) } EXPQ_IMPL(qs8, qs8x16, 16) +EXPQ_IMPL(qs16, qs16x16, 16) #define EXP_OP_EXPAND_STR(a, type, size, position) exp_sat_##type##x##size((a), (position)) #define EXP_OP_EXPAND(a, type, size, position) EXP_OP_EXPAND_STR(a, type, size, position) diff --git a/src/core/CL/cl_kernels/softmax_layer.cl b/src/core/CL/cl_kernels/softmax_layer.cl index 113fc762a6..04736c4d9a 100644 --- a/src/core/CL/cl_kernels/softmax_layer.cl +++ b/src/core/CL/cl_kernels/softmax_layer.cl @@ -64,7 +64,7 @@ __constant uint16 idx16 = (uint16)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, * @note Fixed point position must be given as a preprocessor argument using -DFIXED_POINT_POSITION=pos. e.g. DFIXED_POINT_POSITION=4 * @note In case the input is not multiple of 16 -DNON_MULTIPLE_OF_16 must be passed. * - * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: QS8/F16/F32 + * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: QS8/QS16/F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -125,7 +125,7 @@ __kernel void softmax_layer_max( * @note Fixed point position must be given as a preprocessor argument using -DFIXED_POINT_POSITION=pos. e.g. DFIXED_POINT_POSITION=4 * @note In case the input is not multiple of 16 -DNON_MULTIPLE_OF_16 must be passed. * - * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: QS8/F16/F32 + * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: QS8/QS16/F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -210,7 +210,7 @@ __kernel void softmax_layer_shift_exp_sum( * @note Datatype must be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short * @note Fixed point position must be given as a preprocessor argument using -DFIXED_POINT_POSITION=pos. e.g. DFIXED_POINT_POSITION=4 * - * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: QS8/F16/F32 + * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: QS8/QS16/F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp index 3bca938513..ccaf7453d1 100644 --- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp @@ -41,7 +41,7 @@ using namespace arm_compute; void CLLogits1DMaxKernel::configure(const ICLTensor *input, ICLTensor *output) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_NULLPTR(output); // Softmax across the x dimension @@ -103,7 +103,7 @@ CLLogits1DShiftExpSumKernel::CLLogits1DShiftExpSumKernel() void CLLogits1DShiftExpSumKernel::configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_NULLPTR(max, sum, output); // Output auto initialization if not yet initialized @@ -187,7 +187,7 @@ CLLogits1DNormKernel::CLLogits1DNormKernel() void CLLogits1DNormKernel::configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_NULLPTR(sum, output); // Output auto initialization if not yet initialized diff --git a/src/runtime/CL/functions/CLSoftmaxLayer.cpp b/src/runtime/CL/functions/CLSoftmaxLayer.cpp index 0f20af093b..850eb2c6f8 100644 --- a/src/runtime/CL/functions/CLSoftmaxLayer.cpp +++ b/src/runtime/CL/functions/CLSoftmaxLayer.cpp @@ -36,7 +36,7 @@ CLSoftmaxLayer::CLSoftmaxLayer() void CLSoftmaxLayer::configure(const ICLTensor *input, ICLTensor *output) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); // Create intermediate tensors shapes _tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position())); diff --git a/tests/validation/CL/SoftmaxLayer.cpp b/tests/validation/CL/SoftmaxLayer.cpp index 43efc04d36..2680070e50 100644 --- a/tests/validation/CL/SoftmaxLayer.cpp +++ b/tests/validation/CL/SoftmaxLayer.cpp @@ -49,10 +49,9 @@ using namespace arm_compute::test::validation; namespace { -/** Tolerance for float operations */ -const float tolerance = 0.000001f; -/** Tolerance for fixed point operations */ -const float tolerance_fixed_point = 2.f; +const float tolerance = 0.000001f; /** Tolerance for float operations */ +const float tolerance_qs8 = 2.f; /** Tolerance for QS8 fixed point operations */ +const float tolerance_qs16 = 2.f; /** Tolerance for QS16 fixed point operations */ /** Compute OpenCL softmax layer function. * @@ -160,35 +159,69 @@ BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * CNNFloatDataTypes(), shape, dt) BOOST_AUTO_TEST_SUITE_END() BOOST_AUTO_TEST_SUITE(Quantized) +BOOST_AUTO_TEST_SUITE(QS8) +// Testing for fixed point position [1,6) as reciprocal limits the maximum fixed point position to 5 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit")) -BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * CNNFixedPointDataTypes() * boost::unit_test::data::xrange(1, 6), - shape, dt, fixed_point_position) +BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::xrange(1, 6), + shape, fixed_point_position) { // Compute function - CLTensor dst = compute_softmax_layer(shape, dt, fixed_point_position); + CLTensor dst = compute_softmax_layer(shape, DataType::QS8, fixed_point_position); // Compute reference - RawTensor ref_dst = Reference::compute_reference_softmax_layer(shape, dt, fixed_point_position); + RawTensor ref_dst = Reference::compute_reference_softmax_layer(shape, DataType::QS8, fixed_point_position); // Validate output - validate(CLAccessor(dst), ref_dst, tolerance_fixed_point); + validate(CLAccessor(dst), ref_dst, tolerance_qs8); } BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly")) -BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * CNNFixedPointDataTypes() * boost::unit_test::data::xrange(1, 6), - shape, dt, fixed_point_position) +BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::xrange(1, 6), + shape, fixed_point_position) { // Compute function - CLTensor dst = compute_softmax_layer(shape, dt, fixed_point_position); + CLTensor dst = compute_softmax_layer(shape, DataType::QS8, fixed_point_position); // Compute reference - RawTensor ref_dst = Reference::compute_reference_softmax_layer(shape, dt, fixed_point_position); + RawTensor ref_dst = Reference::compute_reference_softmax_layer(shape, DataType::QS8, fixed_point_position); // Validate output - validate(CLAccessor(dst), ref_dst, tolerance_fixed_point); + validate(CLAccessor(dst), ref_dst, tolerance_qs8); } BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE(QS16) +// Testing for fixed point position [1,14) as reciprocal limits the maximum fixed point position to 14 +BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit")) +BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::xrange(1, 14), + shape, fixed_point_position) +{ + // Compute function + CLTensor dst = compute_softmax_layer(shape, DataType::QS16, fixed_point_position); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_softmax_layer(shape, DataType::QS16, fixed_point_position); + + // Validate output + validate(CLAccessor(dst), ref_dst, tolerance_qs16); +} + +BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly")) +BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::xrange(1, 14), + shape, fixed_point_position) +{ + // Compute function + CLTensor dst = compute_softmax_layer(shape, DataType::QS16, fixed_point_position); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_softmax_layer(shape, DataType::QS16, fixed_point_position); + + // Validate output + validate(CLAccessor(dst), ref_dst, tolerance_qs16); +} +BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() + BOOST_AUTO_TEST_SUITE_END() BOOST_AUTO_TEST_SUITE_END() #endif /* DOXYGEN_SKIP_THIS */ |