From 4c00abb83487b3c8445f6db9140de44dec71efda Mon Sep 17 00:00:00 2001 From: Jacob Bohlin Date: Tue, 15 Jun 2021 14:06:23 +0200 Subject: MLBEDSW-4644 Removed unnecessary LUT DMA commands Fixed a bug where a DMA command for the activation LUT would be issued for every depth-slice of an operator. This caused multiple unnecessary DMA commands. Signed-off-by: Jacob Bohlin Change-Id: I9c291692d8002f05656bb88214836ab389a56cdb --- ethosu/vela/high_level_command_stream_generator.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'ethosu/vela') diff --git a/ethosu/vela/high_level_command_stream_generator.py b/ethosu/vela/high_level_command_stream_generator.py index 5a838f88..6fcf80cb 100644 --- a/ethosu/vela/high_level_command_stream_generator.py +++ b/ethosu/vela/high_level_command_stream_generator.py @@ -128,6 +128,11 @@ def generate_high_level_commands_for_sched_op(sched_op, schedule): for start_width in range(ofm_start.width, ofm_end.width, ofm_step.width): end_width = min(start_width + ofm_step.width, ofm_end.width) + if parent_op.activation_lut: + lut_tensor = [tens for tens in parent_op.inputs if tens.purpose == TensorPurpose.LUT][0] + lut_box = Box([0] * len(lut_tensor.shape), list(lut_tensor.shape)) + yield from dma_if_necessary(sched_op.parent_ps, lut_box, lut_tensor) + for depth_idx, start_channel in enumerate(ofm_depth_slices[:-1]): start_channel = max(start_channel, ofm_start.depth) end_channel = min(ofm_depth_slices[depth_idx + 1], ofm_end.depth) @@ -196,11 +201,6 @@ def generate_high_level_commands_for_sched_op(sched_op, schedule): else: weight_box = None - if parent_op.activation_lut: - lut_tensor = [tens for tens in parent_op.inputs if tens.purpose == TensorPurpose.LUT][0] - lut_box = Box([0] * len(lut_tensor.shape), list(lut_tensor.shape)) - yield from dma_if_necessary(sched_op.parent_ps, lut_box, lut_tensor) - yield NpuStripe( sched_op.parent_ps, block_config.old_style_representation(), -- cgit v1.2.1