aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/scheduler.py
diff options
context:
space:
mode:
authorTim Hall <tim.hall@arm.com>2021-06-08 21:25:57 +0100
committerTim Hall <tim.hall@arm.com>2021-06-08 21:25:57 +0100
commitd784af7e8995a10fb403157af48371699c35bbfe (patch)
treebf40b35b030d560049cef9411293b51e3d70ff4a /ethosu/vela/scheduler.py
parent225e19d3640288e991475ee4c49cb3ffd83cc83b (diff)
downloadethos-u-vela-d784af7e8995a10fb403157af48371699c35bbfe.tar.gz
MLBEDSW-4602: Fix Deepspeech scale & bias reuse issue.
- Deepspeech reuses identical weights and biases throughout the network. Since biases are now interleaved with weights there is a scaling issue when the ifm scales differ between operations using the same weight and scale tensor. - This commit uses interleaved weights/scales on their first use but separates scales to source memory on subsequent use (if the ifm scale is different). Signed-off-by: Tim Hall <tim.hall@arm.com> Change-Id: I7aae163438160a919cae04e235966e75355a6148
Diffstat (limited to 'ethosu/vela/scheduler.py')
-rw-r--r--ethosu/vela/scheduler.py14
1 files changed, 11 insertions, 3 deletions
diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py
index 00a4dfc7..71007a32 100644
--- a/ethosu/vela/scheduler.py
+++ b/ethosu/vela/scheduler.py
@@ -94,6 +94,7 @@ class SchedulerOpInfo:
self.time_index = None # Set by update_op_memory_snapshot
self.ofm_depth_slices: List[int] = [0, stripe.depth]
self.npu_weights_tensor = None
+ self.npu_scales_tensor = None
self.buffered_weight_tensor = None
self.cycles = None
self.slack_buffering_cycles = 0
@@ -248,7 +249,10 @@ class SchedulerOperation:
scheduler_op_info = SchedulerOpInfo(block_config, 0, ifm_shape, ifm2_shape, ofm_shape)
if self.parent_op.weights:
# Default full-depth weight encoding with no buffering
- scheduler_op_info.npu_weights_tensor = weight_compressor.encode_weight_and_scale_tensor(
+ (
+ scheduler_op_info.npu_weights_tensor,
+ scheduler_op_info.npu_scales_tensor,
+ ) = weight_compressor.encode_weight_and_scale_tensor(
self.arch,
self.parent_op,
self.parent_op.weights,
@@ -537,7 +541,7 @@ class Scheduler:
ofm_full_depth_slices = [0, ref_cost.stripe.depth]
# Encode weights for the full depth
- full_weights = weight_compressor.encode_weight_and_scale_tensor(
+ full_weights, full_scales = weight_compressor.encode_weight_and_scale_tensor(
self.arch,
sched_op.parent_op,
weight_tensor,
@@ -552,9 +556,11 @@ class Scheduler:
# No buffering required - take all the weights from permanent storage
if sched_op.op_type == Op.FullyConnected or not needs_dma:
cost.npu_weights_tensor = full_weights
+ cost.npu_scales_tensor = full_scales
return
encoded_weights = full_weights
+ encoded_scales = full_scales
# How many NPU cycles are available under the previously executing
# operator and SRAM unused for performing buffered DMA transfers
@@ -609,7 +615,7 @@ class Scheduler:
# Encode weights based depth slices
cost.ofm_depth_slices = depth_slices
- encoded_weights = weight_compressor.encode_weight_and_scale_tensor(
+ encoded_weights, encoded_scales = weight_compressor.encode_weight_and_scale_tensor(
self.arch,
sched_op.parent_op,
weight_tensor,
@@ -665,8 +671,10 @@ class Scheduler:
# Don't slice or buffer - use the whole depth from persistent storage
cost.ofm_depth_slices = ofm_full_depth_slices
encoded_weights = full_weights
+ encoded_scales = full_scales
cost.npu_weights_tensor = encoded_weights
+ cost.npu_scales_tensor = encoded_scales
def propose_minimal_schedule(self) -> Schedule:
"""Proposes scheduling parameters where every operator is subdivided into the smallest stripe that satisfies the