From 840a72cc745c60eccbd26fe192b035ec68b2ee41 Mon Sep 17 00:00:00 2001
From: Sang-Hoon Park <sang-hoon.park@arm.com>
Date: Wed, 23 Sep 2020 13:24:13 +0100
Subject: COMPMID-3773: Fix the accumulation of projection in QLSTM

The "output_state_in" (previous output state) tensor
is used for accumulation of projection.

The argument for the tensor given to configure() has
to be changed to non-const since CLTensor needs
to be non-const for map() function call for data copying.

Even though NEON-side doesn't need the same change,
it has been done for consistency.

Change-Id: Ifba0ab6dc8260c468e9f087bf51824daefbab7a3
Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4018
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
---
 arm_compute/runtime/CL/functions/CLQLSTMLayer.h   |  4 ++--
 arm_compute/runtime/NEON/functions/NEQLSTMLayer.h |  2 +-
 src/runtime/CL/functions/CLQLSTMLayer.cpp         | 10 +++++-----
 src/runtime/NEON/functions/NEQLSTMLayer.cpp       |  8 ++++----
 4 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h
index 53f337bc61..6e537680ee 100644
--- a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h
+++ b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h
@@ -113,7 +113,7 @@ public:
                    const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
                    const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
                    const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
-                   ICLTensor *cell_state_in, const ICLTensor *output_state_in,
+                   ICLTensor *cell_state_in, ICLTensor *output_state_in,
                    ICLTensor *cell_state_out, ICLTensor *output_state_out, ICLTensor *output,
                    const LSTMParams<ICLTensor> &lstm_params);
 
@@ -163,7 +163,7 @@ public:
                    const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
                    const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
                    const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
-                   ICLTensor *cell_state_in, const ICLTensor *output_state_in,
+                   ICLTensor *cell_state_in, ICLTensor *output_state_in,
                    ICLTensor *cell_state_out, ICLTensor *output_state_out, ICLTensor *output,
                    const LSTMParams<ICLTensor> &lstm_params);
 
diff --git a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
index 59dd567987..7c572de874 100644
--- a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
@@ -115,7 +115,7 @@ public:
                    const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights,
                    const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights,
                    const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias,
-                   const ITensor *cell_state_in, const ITensor *output_state_in,
+                   const ITensor *cell_state_in, ITensor *output_state_in,
                    ITensor *cell_state_out, ITensor *output_state_out, ITensor *output,
                    const LSTMParams<ITensor> &lstm_params);
 
diff --git a/src/runtime/CL/functions/CLQLSTMLayer.cpp b/src/runtime/CL/functions/CLQLSTMLayer.cpp
index a40a5d068d..15a54c7928 100644
--- a/src/runtime/CL/functions/CLQLSTMLayer.cpp
+++ b/src/runtime/CL/functions/CLQLSTMLayer.cpp
@@ -113,7 +113,7 @@ void CLQLSTMLayer::configure(const ICLTensor *input,
                              const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
                              const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
                              const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
-                             ICLTensor *cell_state_in, const ICLTensor *output_state_in,
+                             ICLTensor *cell_state_in, ICLTensor *output_state_in,
                              ICLTensor *cell_state_out, ICLTensor *output_state_out, ICLTensor *output,
                              const LSTMParams<ICLTensor> &lstm_params)
 {
@@ -126,7 +126,7 @@ void CLQLSTMLayer::configure(const CLCompileContext &compile_context, const ICLT
                              const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
                              const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
                              const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
-                             ICLTensor *cell_state_in, const ICLTensor *output_state_in,
+                             ICLTensor *cell_state_in, ICLTensor *output_state_in,
                              ICLTensor *cell_state_out, ICLTensor *output_state_out, ICLTensor *output,
                              const LSTMParams<ICLTensor> &lstm_params)
 {
@@ -504,9 +504,9 @@ void CLQLSTMLayer::configure(const CLCompileContext &compile_context, const ICLT
         if(_projection_tensor_copy_required)
         {
             _hidden_gate.allocator()->allocate();
-            _projection_accumulate_res.allocator()->init(*output_state_out->info());
+            _projection_accumulate_res.allocator()->init(*output_state_in->info());
             _projection_accumulate_res.info()->set_tensor_shape(_projection_outstage_res.info()->tensor_shape());
-            _projection_output_to_accumulate_copy.configure(*output_state_out, _projection_accumulate_res);
+            _projection_output_to_accumulate_copy.configure(*output_state_in, _projection_accumulate_res);
             accumulate_destination = &_projection_accumulate_res;
         }
 
@@ -863,7 +863,7 @@ Status CLQLSTMLayer::validate(const ITensorInfo *input,
 
         if(projection_tensor_copy_required)
         {
-            ARM_COMPUTE_RETURN_ON_ERROR(CLQLSTMLayer::TensorCopyKernel::validate(*output_state_out, projection_outstage_info));
+            ARM_COMPUTE_RETURN_ON_ERROR(CLQLSTMLayer::TensorCopyKernel::validate(*output_state_in, projection_outstage_info));
         }
 
         ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(output_state_out, output_state_out, output_state_out, ConvertPolicy::SATURATE));
diff --git a/src/runtime/NEON/functions/NEQLSTMLayer.cpp b/src/runtime/NEON/functions/NEQLSTMLayer.cpp
index cb45b647c7..5a6b51337a 100644
--- a/src/runtime/NEON/functions/NEQLSTMLayer.cpp
+++ b/src/runtime/NEON/functions/NEQLSTMLayer.cpp
@@ -105,7 +105,7 @@ void NEQLSTMLayer::configure(const ITensor *input,
                              const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights,
                              const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights,
                              const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias,
-                             const ITensor *cell_state_in, const ITensor *output_state_in,
+                             const ITensor *cell_state_in, ITensor *output_state_in,
                              ITensor *cell_state_out, ITensor *output_state_out, ITensor *output,
                              const LSTMParams<ITensor> &lstm_params)
 {
@@ -477,9 +477,9 @@ void NEQLSTMLayer::configure(const ITensor *input,
         if(_projection_tensor_copy_required)
         {
             _hidden_gate.allocator()->allocate();
-            _projection_accumulate_res.allocator()->init(*output_state_out->info());
+            _projection_accumulate_res.allocator()->init(*output_state_in->info());
             _projection_accumulate_res.info()->set_tensor_shape(_projection_outstage_res.info()->tensor_shape());
-            _projection_output_to_accumulate_copy.configure(*output_state_out, _projection_accumulate_res);
+            _projection_output_to_accumulate_copy.configure(*output_state_in, _projection_accumulate_res);
             accumulate_destination = &_projection_accumulate_res;
         }
 
@@ -834,7 +834,7 @@ Status NEQLSTMLayer::validate(const ITensorInfo *input,
 
         if(projection_tensor_copy_required)
         {
-            ARM_COMPUTE_RETURN_ON_ERROR(NEQLSTMLayer::TensorCopyKernel::validate(*output_state_out, projection_outstage_info));
+            ARM_COMPUTE_RETURN_ON_ERROR(NEQLSTMLayer::TensorCopyKernel::validate(*output_state_in, projection_outstage_info));
         }
 
         ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(output_state_out, output_state_out, output_state_out, ConvertPolicy::SATURATE));
-- 
cgit v1.2.1