From fad90c2db9e1b3f19f3a3700b17cf69ed08aea04 Mon Sep 17 00:00:00 2001 From: Patrik Gustavsson Date: Tue, 3 Nov 2020 13:07:40 +0100 Subject: MLBEDSW-3212 Remove CLI opt ifm-ofm-overlap Removed the CLI opt ifm-ofm-overlap Signed-off-by: Patrik Gustavsson Change-Id: I23faa0d10c3e71972c543e22e8155086fce73556 --- ethosu/vela/compiler_driver.py | 4 ---- ethosu/vela/high_level_command_stream_generator.py | 4 ---- ethosu/vela/live_range.py | 27 ++++------------------ ethosu/vela/scheduler.py | 4 +--- ethosu/vela/tensor_allocation.py | 2 -- ethosu/vela/vela.py | 8 ------- 6 files changed, 6 insertions(+), 43 deletions(-) (limited to 'ethosu/vela') diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py index 1d7521b1..9263305a 100644 --- a/ethosu/vela/compiler_driver.py +++ b/ethosu/vela/compiler_driver.py @@ -194,7 +194,6 @@ def compiler_driver(nng, arch, options, scheduler_options): arch, permanent_storage, set((MemType.Permanent_NPU,)), - use_ifm_ofm_overlap=scheduler_options.use_ifm_ofm_overlap, tensor_allocator=TensorAllocator.LinearAlloc, verbose_allocation=options.verbose_allocation, show_minimum_possible_allocation=options.show_minimum_possible_allocation, @@ -232,7 +231,6 @@ def compiler_driver(nng, arch, options, scheduler_options): arch, mem_area, mem_type_set, - use_ifm_ofm_overlap=scheduler_options.use_ifm_ofm_overlap, tensor_allocator=options.tensor_allocator, verbose_allocation=options.verbose_allocation, show_minimum_possible_allocation=options.show_minimum_possible_allocation, @@ -259,7 +257,6 @@ def compiler_driver(nng, arch, options, scheduler_options): mem_type_set, max_size=arch.sram_size, dry_test=dry_test, - use_ifm_ofm_overlap=scheduler_options.use_ifm_ofm_overlap, tensor_allocator=options.tensor_allocator, verbose_allocation=options.verbose_allocation, show_minimum_possible_allocation=options.show_minimum_possible_allocation, @@ -305,7 +302,6 @@ def compiler_driver(nng, arch, options, scheduler_options): arch, permanent_storage, set((MemType.Permanent_CPU,)), - use_ifm_ofm_overlap=scheduler_options.use_ifm_ofm_overlap, tensor_allocator=TensorAllocator.LinearAlloc, verbose_allocation=options.verbose_allocation, show_minimum_possible_allocation=options.show_minimum_possible_allocation, diff --git a/ethosu/vela/high_level_command_stream_generator.py b/ethosu/vela/high_level_command_stream_generator.py index 3e3cda19..01fab0e8 100644 --- a/ethosu/vela/high_level_command_stream_generator.py +++ b/ethosu/vela/high_level_command_stream_generator.py @@ -394,7 +394,3 @@ def calc_allowed_ofm_ifm_overlap_for_pass_list(strat, passes, block_configs): min_overlap = max(min_overlap, 0) return min_overlap - - -def calc_allowed_ofm_ifm_overlap_for_cascaded_pass(cps): - return calc_allowed_ofm_ifm_overlap_for_pass_list(cps.strategy, cps.passes, [ps.block_config for ps in cps.passes]) diff --git a/ethosu/vela/live_range.py b/ethosu/vela/live_range.py index b8840355..a29cafe0 100644 --- a/ethosu/vela/live_range.py +++ b/ethosu/vela/live_range.py @@ -16,7 +16,6 @@ # Description: # Build a live range graph for tensors in one or more subgraphs. Used for tensor allocation as well as in the scheduler. # Can work with either a pass packed subgraph or a scheduled subgraph. -from .high_level_command_stream_generator import calc_allowed_ofm_ifm_overlap_for_cascaded_pass from .nn_graph import PassPlacement from .operation import Op from .tensor import MemType @@ -101,7 +100,6 @@ class LiveRange: class LiveRangeGraph: def __init__(self): self.ranges = {} # tens -> range - self.allowed_overlaps = {} # (tens,tens) -> overlap_int self.ignore_tensors = set() self.processed_subgraphs = set() self.current_time = 0 @@ -198,7 +196,7 @@ def merge_elementwise_op_ranges(ps, lr_graph, target_mem_area, target_mem_type_s def extract_live_ranges_from_passes( sg, target_mem_area, - target_mem_type=set((MemType.Scratch, MemType.Scratch_fast)), + target_mem_type_set=set((MemType.Scratch, MemType.Scratch_fast)), ignore_subgraph_input_output_tensors=False, ): lr_graph = LiveRangeGraph() @@ -209,7 +207,7 @@ def extract_live_ranges_from_passes( # Try to merge live ranges of operations in the NPU subgraphs if sg.placement == PassPlacement.Npu: - merge_op_ranges(sg, lr_graph, target_mem_area, target_mem_type) + merge_op_ranges(sg, lr_graph, target_mem_area, target_mem_type_set) for idx, ps in enumerate(sg.passes): ps.time = 2 * idx @@ -217,14 +215,14 @@ def extract_live_ranges_from_passes( time_for_pass = ps.time for tens in ps.inputs + ps.intermediates + ps.outputs: - if tensor_should_be_ignored(lr_graph, tens, target_mem_area, target_mem_type): + if tensor_should_be_ignored(lr_graph, tens, target_mem_area, target_mem_type_set): continue rng = lr_graph.get_or_create_range(tens) rng.mark_usage(time_for_pass) end_time = len(sg.passes) * 2 for tens in sg.output_tensors: - if tensor_should_be_ignored(lr_graph, tens, target_mem_area, target_mem_type): + if tensor_should_be_ignored(lr_graph, tens, target_mem_area, target_mem_type_set): continue rng = lr_graph.get_or_create_range(tens) rng.mark_usage(end_time) @@ -236,7 +234,6 @@ def extract_live_ranges_from_cascaded_passes( sg, target_mem_area, target_mem_type_set, - use_ifm_ofm_overlap=True, ignore_subgraph_input_output_tensors=False, lr_graph=None, allocation_alignment=Tensor.AllocationQuantum, @@ -279,7 +276,7 @@ def extract_live_ranges_from_cascaded_passes( # Use default allocation alignment of 16 for Npu tensors npu_sg = cps_primary_op.attrs["subgraph"] lr_graph = extract_live_ranges_from_cascaded_passes( - npu_sg, target_mem_area, target_mem_type_set, use_ifm_ofm_overlap, False, lr_graph, + npu_sg, target_mem_area, target_mem_type_set, False, lr_graph, ) # Set the new time after handling the Npu subgraph time_for_pass = lr_graph.current_time @@ -291,20 +288,6 @@ def extract_live_ranges_from_cascaded_passes( rng = lr_graph.get_or_create_range(tens, allocation_alignment) rng.mark_usage(time_for_pass) - if use_ifm_ofm_overlap: - # fill allowed overlap for ifm and ofm tensor - ifm_tensor = cps.passes[0].ifm_tensor - ofm_tensor = cps.passes[-1].ofm_tensor - if ( - ifm_tensor is not None - and ofm_tensor is not None - and not tensor_should_be_ignored(lr_graph, ifm_tensor, target_mem_area, target_mem_type_set) - and not tensor_should_be_ignored(lr_graph, ofm_tensor, target_mem_area, target_mem_type_set) - ): - lr_graph.allowed_overlaps[(ifm_tensor, ofm_tensor)] = calc_allowed_ofm_ifm_overlap_for_cascaded_pass( - cps - ) - lr_graph.current_time += 2 end_time = 0 diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py index 31e6383a..59c2b58f 100644 --- a/ethosu/vela/scheduler.py +++ b/ethosu/vela/scheduler.py @@ -59,7 +59,6 @@ class SchedulerOptions: def __init__( self, use_cascading=True, - use_ifm_ofm_overlap=True, verbose_schedule=False, verbose_pareto_frontier_schedules=False, use_ifm_streaming=True, @@ -67,7 +66,6 @@ class SchedulerOptions: use_nhcwb16_between_cascaded_passes=True, ): self.use_cascading = use_cascading - self.use_ifm_ofm_overlap = use_ifm_ofm_overlap self.verbose_schedule = verbose_schedule self.verbose_pareto_frontier_schedules = verbose_pareto_frontier_schedules self.use_ifm_streaming = use_ifm_streaming @@ -236,7 +234,7 @@ class DynamicProgrammingScheduler: if self.arch.feature_map_storage_mem_area != MemArea.Sram: self.use_ifm_ofm_overlap = False # force off IFM/OFM overlap if IFMs and OFMs are not in the SRAM else: - self.use_ifm_ofm_overlap = options.use_ifm_ofm_overlap + self.use_ifm_ofm_overlap = True self.verbose_schedule = options.verbose_schedule self.verbose_pareto_frontier_schedules = options.verbose_pareto_frontier_schedules diff --git a/ethosu/vela/tensor_allocation.py b/ethosu/vela/tensor_allocation.py index 9f14ec4c..8329a617 100644 --- a/ethosu/vela/tensor_allocation.py +++ b/ethosu/vela/tensor_allocation.py @@ -122,7 +122,6 @@ def allocate_tensors( arch, mem_area, mem_type_set, - use_ifm_ofm_overlap=True, tensor_allocator=TensorAllocator.Greedy, verbose_allocation=False, show_minimum_possible_allocation=False, @@ -137,7 +136,6 @@ def allocate_tensors( sg, mem_area, mem_type_set, - use_ifm_ofm_overlap=use_ifm_ofm_overlap, ignore_subgraph_input_output_tensors=ignore_subgraph_input_output_tensors, lr_graph=lr_graph, allocation_alignment=allocation_alignment, diff --git a/ethosu/vela/vela.py b/ethosu/vela/vela.py index b9e224cb..6d54187c 100644 --- a/ethosu/vela/vela.py +++ b/ethosu/vela/vela.py @@ -158,13 +158,6 @@ def main(args=None): choices=[True, False], help="Controls the packing of multiple passes into a cascade (default: %(default)s)", ) - parser.add_argument( - "--ifm-ofm-overlap", - type=ast.literal_eval, - default=True, - choices=[True, False], - help="Controls the overlapping of IFM and OFM buffers (default: %(default)s)", - ) parser.add_argument("--force-block-config", type=str, default="", help="Force a specific block configuration HxWxC") parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations") parser.add_argument( @@ -313,7 +306,6 @@ def main(args=None): scheduler_options = scheduler.SchedulerOptions( use_cascading=args.cascading, - use_ifm_ofm_overlap=args.ifm_ofm_overlap, verbose_schedule=args.verbose_schedule, verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules, use_ifm_streaming=args.ifm_streaming, -- cgit v1.2.1