diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/CL/kernels/CLActivationLayerKernel.cpp | 3 | ||||
-rw-r--r-- | src/core/NEON/kernels/NESoftmaxLayerKernel.cpp | 26 | ||||
-rw-r--r-- | src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp | 5 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NESoftmaxLayer.cpp | 4 |
4 files changed, 24 insertions, 14 deletions
diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp index 8172aafca9..c097b5ff70 100644 --- a/src/core/CL/kernels/CLActivationLayerKernel.cpp +++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp @@ -68,8 +68,7 @@ std::pair<Error, Window> validate_and_configure_window(ITensorInfo *input, ITens { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output, - *input->clone()); + auto_init_if_empty(*output, *input); } const unsigned int num_elems_processed_per_iteration = 16 / input->element_size(); diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp index f1027590e4..a8a0f59a41 100644 --- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp +++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp @@ -251,8 +251,10 @@ void NELogits1DMaxKernel::run(const Window &window, const ThreadInfo &info) namespace { -void logits_1d_shift_exp_sum_qs8(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window) +void logits_1d_shift_exp_sum_qs8(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window, float beta) { + ARM_COMPUTE_UNUSED(beta); + Window window_max(window); window_max.set(Window::DimX, Window::Dimension(0, 0, 0)); @@ -313,8 +315,10 @@ void logits_1d_shift_exp_sum_qs8(const ITensor *in, const ITensor *max, ITensor } while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(max_slice)); } -void logits_1d_shift_exp_sum_qs16(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window) +void logits_1d_shift_exp_sum_qs16(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window, float beta) { + ARM_COMPUTE_UNUSED(beta); + Window window_max(window); window_max.set(Window::DimX, Window::Dimension(0, 0, 0)); @@ -375,7 +379,7 @@ void logits_1d_shift_exp_sum_qs16(const ITensor *in, const ITensor *max, ITensor } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -void logits_1d_shift_exp_sum_f16(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window) +void logits_1d_shift_exp_sum_f16(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window, float beta) { Window window_max(window); window_max.set(Window::DimX, Window::Dimension(0, 0, 0)); @@ -410,6 +414,7 @@ void logits_1d_shift_exp_sum_f16(const ITensor *in, const ITensor *max, ITensor { float16x8_t vec_elements = vld1q_f16(in_ptr); vec_elements = vsubq_f16(vec_elements, vec_max); + vec_elements = vmulq_n_f16(vec_elements, beta); vec_elements = vexpq_f16(vec_elements); vst1q_f16(exp_ptr, vec_elements); @@ -426,7 +431,7 @@ void logits_1d_shift_exp_sum_f16(const ITensor *in, const ITensor *max, ITensor // Run remaining elements for(int i = 0; i < small_steps; ++i) { - const float16_t element = std::exp(static_cast<float>(in_ptr[i] - *max_ptr)); + const float16_t element = std::exp(static_cast<float>(in_ptr[i] - *max_ptr) * beta); exp_ptr[i] = element; sum += element; } @@ -436,7 +441,7 @@ void logits_1d_shift_exp_sum_f16(const ITensor *in, const ITensor *max, ITensor } #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -void logits_1d_shift_exp_sum_f32(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window) +void logits_1d_shift_exp_sum_f32(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window, float beta) { Window window_max(window); window_max.set(Window::DimX, Window::Dimension(0, 0, 0)); @@ -471,6 +476,7 @@ void logits_1d_shift_exp_sum_f32(const ITensor *in, const ITensor *max, ITensor { float32x4_t vec_elements = vld1q_f32(in_ptr); vec_elements = vsubq_f32(vec_elements, vec_max); + vec_elements = vmulq_n_f32(vec_elements, beta); vec_elements = vexpq_f32(vec_elements); vst1q_f32(exp_ptr, vec_elements); @@ -488,7 +494,7 @@ void logits_1d_shift_exp_sum_f32(const ITensor *in, const ITensor *max, ITensor // Run remaining elements for(int i = 0; i < small_steps; ++i) { - float element = std::exp(in_ptr[i] - *max_ptr); + float element = std::exp((in_ptr[i] - *max_ptr) * beta); exp_ptr[i] = element; sum += element; } @@ -500,14 +506,15 @@ void logits_1d_shift_exp_sum_f32(const ITensor *in, const ITensor *max, ITensor } //namespace NELogits1DShiftExpSumKernel::NELogits1DShiftExpSumKernel() - : _func(nullptr), _input(nullptr), _max(nullptr), _output(nullptr), _sum(nullptr) + : _func(nullptr), _input(nullptr), _max(nullptr), _output(nullptr), _sum(nullptr), _beta(1.0f) { } -void NELogits1DShiftExpSumKernel::configure(const ITensor *input, const ITensor *max, ITensor *output, ITensor *sum) +void NELogits1DShiftExpSumKernel::configure(const ITensor *input, const ITensor *max, ITensor *output, ITensor *sum, float beta) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_NULLPTR(max, sum, output); + ARM_COMPUTE_ERROR_ON((beta != 1.0f) && is_data_type_fixed_point(input->info()->data_type())); // Output auto initialization if not yet initialized auto_init_if_empty(*sum->info(), max->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position()); @@ -545,6 +552,7 @@ void NELogits1DShiftExpSumKernel::configure(const ITensor *input, const ITensor _max = max; _output = output; _sum = sum; + _beta = beta; // Configure kernel window Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); @@ -568,7 +576,7 @@ void NELogits1DShiftExpSumKernel::run(const Window &window, const ThreadInfo &in ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); - (*_func)(_input, _max, _output, _sum, window); + (*_func)(_input, _max, _output, _sum, window, _beta); } namespace diff --git a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp index 1db927c8ff..34464ff057 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp @@ -34,9 +34,12 @@ GCSoftmaxLayer::GCSoftmaxLayer() { } -void GCSoftmaxLayer::configure(const IGCTensor *input, IGCTensor *output) +void GCSoftmaxLayer::configure(const IGCTensor *input, IGCTensor *output, float beta) { + ARM_COMPUTE_UNUSED(beta); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON(beta != 1.0f); // Create intermediate tensors shapes _tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position())); diff --git a/src/runtime/NEON/functions/NESoftmaxLayer.cpp b/src/runtime/NEON/functions/NESoftmaxLayer.cpp index cc5d4e91c3..84ecfdaf33 100644 --- a/src/runtime/NEON/functions/NESoftmaxLayer.cpp +++ b/src/runtime/NEON/functions/NESoftmaxLayer.cpp @@ -36,7 +36,7 @@ NESoftmaxLayer::NESoftmaxLayer(std::shared_ptr<IMemoryManager> memory_manager) { } -void NESoftmaxLayer::configure(ITensor *input, ITensor *output) +void NESoftmaxLayer::configure(ITensor *input, ITensor *output, float beta) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); @@ -57,7 +57,7 @@ void NESoftmaxLayer::configure(ITensor *input, ITensor *output) // Configure Kernels _max_kernel.configure(input, &_max); - _shift_exp_sum_kernel.configure(input, &_max, &_tmp, &_sum); + _shift_exp_sum_kernel.configure(input, &_max, &_tmp, &_sum, beta); _norm_kernel.configure(&_tmp, &_sum, output); _fill_border_kernel.configure(input, _max_kernel.border_size(), BorderMode::REPLICATE); |