aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL/functions/CLSoftmaxLayer.cpp
diff options
context:
space:
mode:
authorChunosov <N.Chunosov@yandex.ru>2017-11-06 22:09:45 +0700
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commitd6afedc775220f17317f1835a4d18b72a54525de (patch)
tree54aed8322a4a286ba376d74bbee61c85a588cc9b /src/runtime/CL/functions/CLSoftmaxLayer.cpp
parent6ff12a0f7765f62b8d0fa8554021e1cac2789f19 (diff)
downloadComputeLibrary-d6afedc775220f17317f1835a4d18b72a54525de.tar.gz
COMPMID-661: softmax-fp32 optimisation (#14)
Change-Id: I2007af1ed9dcf68065cf412aa50f73a2025b31a6 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/94605 Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Diffstat (limited to 'src/runtime/CL/functions/CLSoftmaxLayer.cpp')
-rw-r--r--src/runtime/CL/functions/CLSoftmaxLayer.cpp36
1 files changed, 30 insertions, 6 deletions
diff --git a/src/runtime/CL/functions/CLSoftmaxLayer.cpp b/src/runtime/CL/functions/CLSoftmaxLayer.cpp
index fa324ee61d..7268d8eab5 100644
--- a/src/runtime/CL/functions/CLSoftmaxLayer.cpp
+++ b/src/runtime/CL/functions/CLSoftmaxLayer.cpp
@@ -23,15 +23,19 @@
*/
#include "arm_compute/runtime/CL/functions/CLSoftmaxLayer.h"
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h"
#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Utils.h"
#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
using namespace arm_compute;
CLSoftmaxLayer::CLSoftmaxLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _max_kernel(), _shift_exp_sum_kernel(), _norm_kernel(), _max(), _sum(), _tmp()
+ : _memory_group(std::move(memory_manager)), _max_kernel(), _shift_exp_sum_kernel(), _max_shift_exp_sum_kernel(), _norm_kernel(), _max(), _sum(), _tmp(), _run_legacy_path(false)
{
}
@@ -48,14 +52,26 @@ void CLSoftmaxLayer::configure(const ICLTensor *input, ICLTensor *output, float
_max.allocator()->init(tensor_info_max_sum);
_sum.allocator()->init(tensor_info_max_sum);
+ // Set GPU target to kernels
+ _max_shift_exp_sum_kernel.set_target(CLScheduler::get().target());
+
// Manage intermediate buffers
_memory_group.manage(&_tmp);
_memory_group.manage(&_max);
_memory_group.manage(&_sum);
- // Configure Kernels
- _max_kernel.configure(input, &_max);
- _shift_exp_sum_kernel.configure(input, &_max, &_tmp, &_sum, beta);
+ // Configure kernels
+ // TODO (COMPMID-661): Remove legacy path once the new one is properly validated
+ _run_legacy_path = is_data_type_quantized_assymetric(input->info()->data_type());
+ if(_run_legacy_path)
+ {
+ _max_kernel.configure(input, &_max);
+ _shift_exp_sum_kernel.configure(input, &_max, &_tmp, &_sum, beta);
+ }
+ else
+ {
+ _max_shift_exp_sum_kernel.configure(input, &_max, &_tmp, &_sum, beta);
+ }
_norm_kernel.configure(&_tmp, &_sum, output);
// Allocate intermediate buffers
@@ -68,8 +84,16 @@ void CLSoftmaxLayer::run()
{
_memory_group.acquire();
- CLScheduler::get().enqueue(_max_kernel, false);
- CLScheduler::get().enqueue(_shift_exp_sum_kernel, false);
+ // Force to use the new fused kernel
+ if(_run_legacy_path)
+ {
+ CLScheduler::get().enqueue(_max_kernel, false);
+ CLScheduler::get().enqueue(_shift_exp_sum_kernel, false);
+ }
+ else
+ {
+ CLScheduler::get().enqueue(_max_shift_exp_sum_kernel, false);
+ }
CLScheduler::get().enqueue(_norm_kernel);
_memory_group.release();