aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacob Bohlin <jacob.bohlin@arm.com>2021-06-15 14:06:23 +0200
committerJacob Bohlin <jacob.bohlin@arm.com>2021-06-16 09:04:57 +0200
commit4c00abb83487b3c8445f6db9140de44dec71efda (patch)
treee3a68b3865fed12deaaec862819e5163c1dc9069
parent3e4168d741c167d2d52b1a3fc9a800c101bba09b (diff)
downloadethos-u-vela-4c00abb83487b3c8445f6db9140de44dec71efda.tar.gz
MLBEDSW-4644 Removed unnecessary LUT DMA commands
Fixed a bug where a DMA command for the activation LUT would be issued for every depth-slice of an operator. This caused multiple unnecessary DMA commands. Signed-off-by: Jacob Bohlin <jacob.bohlin@arm.com> Change-Id: I9c291692d8002f05656bb88214836ab389a56cdb
-rw-r--r--ethosu/vela/high_level_command_stream_generator.py10
1 files changed, 5 insertions, 5 deletions
diff --git a/ethosu/vela/high_level_command_stream_generator.py b/ethosu/vela/high_level_command_stream_generator.py
index 5a838f88..6fcf80cb 100644
--- a/ethosu/vela/high_level_command_stream_generator.py
+++ b/ethosu/vela/high_level_command_stream_generator.py
@@ -128,6 +128,11 @@ def generate_high_level_commands_for_sched_op(sched_op, schedule):
for start_width in range(ofm_start.width, ofm_end.width, ofm_step.width):
end_width = min(start_width + ofm_step.width, ofm_end.width)
+ if parent_op.activation_lut:
+ lut_tensor = [tens for tens in parent_op.inputs if tens.purpose == TensorPurpose.LUT][0]
+ lut_box = Box([0] * len(lut_tensor.shape), list(lut_tensor.shape))
+ yield from dma_if_necessary(sched_op.parent_ps, lut_box, lut_tensor)
+
for depth_idx, start_channel in enumerate(ofm_depth_slices[:-1]):
start_channel = max(start_channel, ofm_start.depth)
end_channel = min(ofm_depth_slices[depth_idx + 1], ofm_end.depth)
@@ -196,11 +201,6 @@ def generate_high_level_commands_for_sched_op(sched_op, schedule):
else:
weight_box = None
- if parent_op.activation_lut:
- lut_tensor = [tens for tens in parent_op.inputs if tens.purpose == TensorPurpose.LUT][0]
- lut_box = Box([0] * len(lut_tensor.shape), list(lut_tensor.shape))
- yield from dma_if_necessary(sched_op.parent_ps, lut_box, lut_tensor)
-
yield NpuStripe(
sched_op.parent_ps,
block_config.old_style_representation(),