diff options
-rw-r--r-- | arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h | 6 | ||||
-rw-r--r-- | arm_compute/runtime/CL/functions/CLSoftmaxLayer.h | 8 | ||||
-rw-r--r-- | arm_compute/runtime/NEON/functions/NESoftmaxLayer.h | 4 | ||||
-rw-r--r-- | docs/00_introduction.dox | 1 | ||||
-rw-r--r-- | src/core/CL/cl_kernels/softmax_layer.cl | 3 | ||||
-rw-r--r-- | src/core/CL/cl_kernels/softmax_layer_quantized.cl | 13 | ||||
-rw-r--r-- | src/core/CL/kernels/CLSoftmaxLayerKernel.cpp | 1 | ||||
-rw-r--r-- | src/core/NEON/kernels/NESoftmaxLayerKernel.cpp | 8 | ||||
-rw-r--r-- | tests/validation/CL/LogSoftmaxLayer.cpp | 58 | ||||
-rw-r--r-- | tests/validation/reference/SoftmaxLayer.cpp | 2 |
10 files changed, 30 insertions, 74 deletions
diff --git a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h index b174f493b5..bd544e1537 100644 --- a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h @@ -203,7 +203,7 @@ public: CLLogits1DNormKernel &operator=(CLLogits1DNormKernel &&) = default; /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: S32/F16/F32 + * @param[in] input Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported. * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input * @param[out] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. @@ -212,7 +212,7 @@ public: /** Set the input and output tensors. * * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: S32/F16/F32 + * @param[in] input Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported. * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input * @param[out] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. @@ -220,7 +220,7 @@ public: void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DNormKernel * - * @param[in] input Source tensor. Data types supported: S32/F16/F32 + * @param[in] input Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported. * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input * @param[in] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h index f0ef15acd7..40b6743031 100644 --- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h +++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h @@ -44,7 +44,7 @@ class ICLTensor; * @f[ out = exp((x - max(x)) * beta) / sum(exp((x - max(x)) * beta)) @f] * * Log Softmax is calculated by : - * @f[ out = (x - max(x) * beta) - \sum{e^{x - max(x) * beta}} @f] + * @f[ out = (x - max(x) * beta) - log(\sum{e^{x - max(x) * beta}}) @f] * * This function runs the following kernels: * -# @ref CLLogits1DMaxKernel @@ -63,7 +63,7 @@ public: CLSoftmaxLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 + * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 for Softmax and F16/F32 for Log Softmax * @param[out] output Destination tensor. Data types supported: same as @p input * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f * @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0. @@ -75,7 +75,7 @@ public: /** Set the input and output tensors. * * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 + * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 for Softmax and F16/F32 for Log Softmax * @param[out] output Destination tensor. Data types supported: same as @p input * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f * @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0. @@ -86,7 +86,7 @@ public: void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, float beta = 1.0f, size_t reduce_end_axis = 0); /** Static function to check if given info will lead to a valid configuration of @ref CLSoftmaxLayer * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 + * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 for Softmax and F16/F32 for Log Softmax * @param[in] output Destination tensor. Data types supported: same as @p input * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f * @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0. diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h index 51d981de44..fc1316d33c 100644 --- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h +++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h @@ -39,10 +39,10 @@ class ITensor; /** Basic function to compute a SoftmaxLayer and a Log SoftmaxLayer. * * Softmax is calculated by : - * @f[ out = \frac{e^{x - max(x)}}{\sum{e^{x - max(x)}}} @f] + * @f[ out = exp((x - max(x)) * beta) / sum(exp((x - max(x)) * beta)) @f] * * Log Softmax is calculated by : - * @f[ out = (x - max(x)) - \sum{e^{x - max(x)}} @f] + * @f[ out = (x - max(x) * beta) - log(\sum{e^{x - max(x) * beta}}) @f] * * This function runs the following kernels: * -# @ref NEFillBorderKernel diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox index 85146cb15d..cfd232d8e4 100644 --- a/docs/00_introduction.dox +++ b/docs/00_introduction.dox @@ -246,6 +246,7 @@ v20.08 Public major release "axis" has been renamed to "reduce_end_axis", which is the last axis (inclusive) before which all dimensions are reduced/collapsed. The default "axis" (now "reduce_end_axis") value for @ref NESoftmaxLayer and @ref NELogSoftmaxLayer is changed from -1 to 0. The default "axis" (now "reduce_end_axis") value for @ref CLSoftmaxLayer, @ref CLLogSoftmaxLayer and @ref GCSoftmaxLayer is changed from 1 to 0. + - The support for quantized data types has been removed from @ref CLLogSoftmaxLayer due to implementation complexity. - Added new data type QASYMM8_SIGNED support for: - @ref CLArgMinMaxLayer - @ref CLArgMinMaxLayerKernel diff --git a/src/core/CL/cl_kernels/softmax_layer.cl b/src/core/CL/cl_kernels/softmax_layer.cl index 767cf4c4f7..0c9f8c1c66 100644 --- a/src/core/CL/cl_kernels/softmax_layer.cl +++ b/src/core/CL/cl_kernels/softmax_layer.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -112,6 +112,7 @@ __kernel void softmax_layer_norm( VEC_DATA_TYPE(DATA_TYPE, 16) data = vload16(0, (__global DATA_TYPE *)offset(&src, 0, 0)); #ifdef LOG_SOFTMAX + sum_val = log(sum_val); vstore16(SUB_OP(data, sum_val, DATA_TYPE, 16), 0, (__global DATA_TYPE *)offset(&dst, 0, 0)); #else /* LOG_SOFTMAX */ vstore16(DIV_OP(data, sum_val, DATA_TYPE, 16), 0, (__global DATA_TYPE *)offset(&dst, 0, 0)); diff --git a/src/core/CL/cl_kernels/softmax_layer_quantized.cl b/src/core/CL/cl_kernels/softmax_layer_quantized.cl index 5d35e50b1f..81e7b896d5 100644 --- a/src/core/CL/cl_kernels/softmax_layer_quantized.cl +++ b/src/core/CL/cl_kernels/softmax_layer_quantized.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -570,14 +570,12 @@ __kernel void softmax_layer_norm_quantized( int sum_val = *((__global int *)offset(&sum, 0, get_global_id(1))); // It will be better to calculate this in prev layer and pass here as parameter -#ifndef LOG_SOFTMAX uint sum_val_u = convert_uint(sum_val); int headroom_plus_one = clz(sum_val_u); int num_bits_over_unit = EXP_ACCUMULATION_INT_BITS - headroom_plus_one; int shifted_sum_minus_one_1 = convert_int((sum_val_u << headroom_plus_one) - (1u << 31)); int16 shifted_sum_minus_one = shifted_sum_minus_one_1; int16 shifted_scale = ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1(shifted_sum_minus_one, 16); -#endif /* LOG_SOFTMAX */ // It was already calculated in prev layer, should be stored into tmp output and reused int16 data_diff = vload16(0, (__global int *)offset(&src, 0, 0)); @@ -589,18 +587,13 @@ __kernel void softmax_layer_norm_quantized( } #endif /* defined(INPUT_BETA_MULTIPLIER) && defined(INPUT_BETA_LEFT_SHIFT) */ -#ifdef LOG_SOFTMAX - long16 data = SUB_OP(convert_long16(data_diff_mult), (long16)(sum_val), long, 16); - data = select(0L, data, convert_long16(data_diff) >= (long16)(DIFF_MIN)); -#else /* LOG_SOFTMAX */ int16 data = ASYMM_EXP_ON_NEGATIVE_VALUES(data_diff_mult, SCALED_DIFF_INT_BITS, 16); data = ASYMM_MULT(shifted_scale, data, 16); data = ASYMM_ROUNDING_DIVIDE_BY_POW2(data, num_bits_over_unit + 31 - 8, 16); #ifdef QASYMM8_SIGNED - data = ADD_OP(data, (int16)(MIN_VALUE), int, 16); + data = ADD_OP(data, (int16)(MIN_VALUE), int, 16); #endif /* QASYMM8_SIGNED */ - data = select(MIN_VALUE, data, data_diff >= (int16)(DIFF_MIN)); -#endif /* LOG_SOFTMAX */ + data = select(MIN_VALUE, data, data_diff >= (int16)(DIFF_MIN)); vstore16(CONVERT_SAT(data, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)offset(&dst, 0, 0)); } diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp index 09deb94a85..85d70b04d0 100644 --- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp @@ -129,6 +129,7 @@ Status validate_arguments_1DNorm(const ITensorInfo *input, const ITensorInfo *su ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(sum, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, sum); + ARM_COMPUTE_RETURN_ERROR_ON(info.is_log && !is_data_type_float(info.input_data_type)); // Note: output should always have a scale of 1/256 and offset 0 const QuantizationInfo allowed_quantization_info = get_softmax_output_quantization_info(info.input_data_type, info.is_log); diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp index 41bf03ad1d..35e5973aff 100644 --- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp +++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp @@ -368,6 +368,10 @@ void logits_1d_softmax_qasymm8(const ITensor &in, const ITensor &max, void *cons { sum_inversed = 256.f / sum; } + else + { + sum = std::log(sum); + } } /* Normalize exponentials */ @@ -516,6 +520,10 @@ void logits_1d_softmax_float(const ITensor &in, const ITensor &max, void *const { sum_inversed = T(1) / sum; } + else + { + sum = static_cast<T>(std::log(sum)); + } } /* Normalize exponentials */ diff --git a/tests/validation/CL/LogSoftmaxLayer.cpp b/tests/validation/CL/LogSoftmaxLayer.cpp index 39d2483ab8..420e6b2fc1 100644 --- a/tests/validation/CL/LogSoftmaxLayer.cpp +++ b/tests/validation/CL/LogSoftmaxLayer.cpp @@ -46,17 +46,6 @@ namespace /** Tolerance for float operations */ RelativeTolerance<half> tolerance_f16(half(0.2)); RelativeTolerance<float> tolerance_f32(0.001f); - -/** Tolerance for quantized operations */ -constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); - -/** CNN data types */ -const auto CNNDataTypes = framework::dataset::make("DataType", -{ - DataType::QASYMM8, - DataType::F16, - DataType::F32, -}); } // namespace TEST_SUITE(CL) @@ -91,7 +80,7 @@ FIXTURE_DATA_TEST_CASE(Run4D, CLLogSoftmaxLayerFixture<half>, framework::Dataset // Validate output validate(CLAccessor(_target), _reference, tolerance_f16); } -TEST_SUITE_END() +TEST_SUITE_END() // FP16 TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, CLLogSoftmaxLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SoftmaxLayerSmallShapes(), @@ -118,47 +107,10 @@ FIXTURE_DATA_TEST_CASE(Run4D, CLLogSoftmaxLayerFixture<float>, framework::Datase // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); } -TEST_SUITE_END() -TEST_SUITE_END() - -template <typename T> -using CLLogSoftmaxLayerQuantizedFixture = SoftmaxValidationQuantizedFixture<CLTensor, CLAccessor, CLLogSoftmaxLayer, T, true>; - -TEST_SUITE(Quantized) -TEST_SUITE(QASYMM8) -FIXTURE_DATA_TEST_CASE(RunSmall, CLLogSoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SoftmaxLayerSmallShapes(), - framework::dataset::make("DataType", DataType::QASYMM8)), - combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }), - framework::dataset::make("Beta", { 1.0f, 2.f }))), - framework::dataset::make("ReduceEndAxis", { 0, 1 }))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_qasymm8); -} -FIXTURE_DATA_TEST_CASE(RunLarge, CLLogSoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayerLargeShapes(), - framework::dataset::make("DataType", DataType::QASYMM8)), - combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }), - framework::dataset::make("Beta", { 1.0f, 2.0f }))), - framework::dataset::make("ReduceEndAxis", { 0 }))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_qasymm8); -} -FIXTURE_DATA_TEST_CASE(Run4D, CLLogSoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayer4DShapes(), - framework::dataset::make("DataType", DataType::QASYMM8)), - combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }), - framework::dataset::make("Beta", { 1.0f, 2.0f }))), - framework::dataset::make("ReduceEndAxis", { 0, 1, 2 }))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_qasymm8); -} - -TEST_SUITE_END() -TEST_SUITE_END() - -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE_END() // FP32 +TEST_SUITE_END() // Float +TEST_SUITE_END() // LogSoftmaxLayer +TEST_SUITE_END() // CL } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/reference/SoftmaxLayer.cpp b/tests/validation/reference/SoftmaxLayer.cpp index 9a8d46d516..e295f73f52 100644 --- a/tests/validation/reference/SoftmaxLayer.cpp +++ b/tests/validation/reference/SoftmaxLayer.cpp @@ -85,7 +85,7 @@ SimpleTensor<T> softmax_layer_generic(const SimpleTensor<T> &src, float beta, in { if(is_log) { - return val - sum; + return val - static_cast<T>(std::log(sum)); } else { |