MLBEDSW-4602: Fix Deepspeech scale & bias reuse issue.

- Deepspeech reuses identical weights and biases throughout the network. Since biases are now interleaved with weights there is a scaling issue when the ifm scales differ between operations using the same weight and scale tensor. - This commit uses interleaved weights/scales on their first use but separates scales to source memory on subsequent use (if the ifm scale is different). Signed-off-by: Tim Hall <tim.hall@arm.com> Change-Id: I7aae163438160a919cae04e235966e75355a6148
author: Tim Hall <tim.hall@arm.com> 2021-06-08 21:25:57 +0100
committer: Tim Hall <tim.hall@arm.com> 2021-06-08 21:25:57 +0100
commit: d784af7e8995a10fb403157af48371699c35bbfe (patch)
tree: bf40b35b030d560049cef9411293b51e3d70ff4a /ethosu/vela/scheduler.py
parent: 225e19d3640288e991475ee4c49cb3ffd83cc83b (diff)
download: ethos-u-vela-d784af7e8995a10fb403157af48371699c35bbfe.tar.gz
1 files changed, 11 insertions, 3 deletions
diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py
index 00a4dfc7..71007a32 100644
--- a/ethosu/vela/scheduler.py
+++ b/ethosu/vela/scheduler.py
@@ -94,6 +94,7 @@ class SchedulerOpInfo:
         self.time_index = None  # Set by update_op_memory_snapshot
         self.ofm_depth_slices: List[int] = [0, stripe.depth]
         self.npu_weights_tensor = None
+        self.npu_scales_tensor = None
         self.buffered_weight_tensor = None
         self.cycles = None
         self.slack_buffering_cycles = 0
@@ -248,7 +249,10 @@ class SchedulerOperation:
         scheduler_op_info = SchedulerOpInfo(block_config, 0, ifm_shape, ifm2_shape, ofm_shape)
         if self.parent_op.weights:
             # Default full-depth weight encoding with no buffering
-            scheduler_op_info.npu_weights_tensor = weight_compressor.encode_weight_and_scale_tensor(
+            (
+                scheduler_op_info.npu_weights_tensor,
+                scheduler_op_info.npu_scales_tensor,
+            ) = weight_compressor.encode_weight_and_scale_tensor(
                 self.arch,
                 self.parent_op,
                 self.parent_op.weights,
@@ -537,7 +541,7 @@ class Scheduler:
         ofm_full_depth_slices = [0, ref_cost.stripe.depth]
 
         # Encode weights for the full depth
-        full_weights = weight_compressor.encode_weight_and_scale_tensor(
+        full_weights, full_scales = weight_compressor.encode_weight_and_scale_tensor(
             self.arch,
             sched_op.parent_op,
             weight_tensor,
@@ -552,9 +556,11 @@ class Scheduler:
         # No buffering required - take all the weights from permanent storage
         if sched_op.op_type == Op.FullyConnected or not needs_dma:
             cost.npu_weights_tensor = full_weights
+            cost.npu_scales_tensor = full_scales
             return
 
         encoded_weights = full_weights
+        encoded_scales = full_scales
 
         # How many NPU cycles are available under the previously executing
         # operator and SRAM unused for performing buffered DMA transfers
@@ -609,7 +615,7 @@ class Scheduler:
 
                     # Encode weights based depth slices
                     cost.ofm_depth_slices = depth_slices
-                    encoded_weights = weight_compressor.encode_weight_and_scale_tensor(
+                    encoded_weights, encoded_scales = weight_compressor.encode_weight_and_scale_tensor(
                         self.arch,
                         sched_op.parent_op,
                         weight_tensor,
@@ -665,8 +671,10 @@ class Scheduler:
             # Don't slice or buffer - use the whole depth from persistent storage
             cost.ofm_depth_slices = ofm_full_depth_slices
             encoded_weights = full_weights
+            encoded_scales = full_scales
 
         cost.npu_weights_tensor = encoded_weights
+        cost.npu_scales_tensor = encoded_scales
 
     def propose_minimal_schedule(self) -> Schedule:
         """Proposes scheduling parameters where every operator is subdivided into the smallest stripe that satisfies the
author	Tim Hall <tim.hall@arm.com>	2021-06-08 21:25:57 +0100
committer	Tim Hall <tim.hall@arm.com>	2021-06-08 21:25:57 +0100
commit	d784af7e8995a10fb403157af48371699c35bbfe (patch)
tree	bf40b35b030d560049cef9411293b51e3d70ff4a /ethosu/vela/scheduler.py
parent	225e19d3640288e991475ee4c49cb3ffd83cc83b (diff)
download	ethos-u-vela-d784af7e8995a10fb403157af48371699c35bbfe.tar.gz