aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/scheduler.py
diff options
context:
space:
mode:
Diffstat (limited to 'ethosu/vela/scheduler.py')
-rw-r--r--ethosu/vela/scheduler.py14
1 files changed, 11 insertions, 3 deletions
diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py
index 00a4dfc7..71007a32 100644
--- a/ethosu/vela/scheduler.py
+++ b/ethosu/vela/scheduler.py
@@ -94,6 +94,7 @@ class SchedulerOpInfo:
self.time_index = None # Set by update_op_memory_snapshot
self.ofm_depth_slices: List[int] = [0, stripe.depth]
self.npu_weights_tensor = None
+ self.npu_scales_tensor = None
self.buffered_weight_tensor = None
self.cycles = None
self.slack_buffering_cycles = 0
@@ -248,7 +249,10 @@ class SchedulerOperation:
scheduler_op_info = SchedulerOpInfo(block_config, 0, ifm_shape, ifm2_shape, ofm_shape)
if self.parent_op.weights:
# Default full-depth weight encoding with no buffering
- scheduler_op_info.npu_weights_tensor = weight_compressor.encode_weight_and_scale_tensor(
+ (
+ scheduler_op_info.npu_weights_tensor,
+ scheduler_op_info.npu_scales_tensor,
+ ) = weight_compressor.encode_weight_and_scale_tensor(
self.arch,
self.parent_op,
self.parent_op.weights,
@@ -537,7 +541,7 @@ class Scheduler:
ofm_full_depth_slices = [0, ref_cost.stripe.depth]
# Encode weights for the full depth
- full_weights = weight_compressor.encode_weight_and_scale_tensor(
+ full_weights, full_scales = weight_compressor.encode_weight_and_scale_tensor(
self.arch,
sched_op.parent_op,
weight_tensor,
@@ -552,9 +556,11 @@ class Scheduler:
# No buffering required - take all the weights from permanent storage
if sched_op.op_type == Op.FullyConnected or not needs_dma:
cost.npu_weights_tensor = full_weights
+ cost.npu_scales_tensor = full_scales
return
encoded_weights = full_weights
+ encoded_scales = full_scales
# How many NPU cycles are available under the previously executing
# operator and SRAM unused for performing buffered DMA transfers
@@ -609,7 +615,7 @@ class Scheduler:
# Encode weights based depth slices
cost.ofm_depth_slices = depth_slices
- encoded_weights = weight_compressor.encode_weight_and_scale_tensor(
+ encoded_weights, encoded_scales = weight_compressor.encode_weight_and_scale_tensor(
self.arch,
sched_op.parent_op,
weight_tensor,
@@ -665,8 +671,10 @@ class Scheduler:
# Don't slice or buffer - use the whole depth from persistent storage
cost.ofm_depth_slices = ofm_full_depth_slices
encoded_weights = full_weights
+ encoded_scales = full_scales
cost.npu_weights_tensor = encoded_weights
+ cost.npu_scales_tensor = encoded_scales
def propose_minimal_schedule(self) -> Schedule:
"""Proposes scheduling parameters where every operator is subdivided into the smallest stripe that satisfies the