aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrik Gustavsson <patrik.gustavsson@arm.com>2020-11-03 13:07:40 +0100
committerPatrik Gustavsson <patrik.gustavsson@arm.com>2020-11-06 08:34:25 +0100
commitfad90c2db9e1b3f19f3a3700b17cf69ed08aea04 (patch)
tree5fbdec33e86721fea95efbf8236052fdf0c0c88d
parent65fd99830a762b2c59aaa446b55cbfa43a92f8ba (diff)
downloadethos-u-vela-fad90c2db9e1b3f19f3a3700b17cf69ed08aea04.tar.gz
MLBEDSW-3212 Remove CLI opt ifm-ofm-overlap
Removed the CLI opt ifm-ofm-overlap Signed-off-by: Patrik Gustavsson <patrik.gustavsson@arm.com> Change-Id: I23faa0d10c3e71972c543e22e8155086fce73556
-rw-r--r--OPTIONS.md12
-rw-r--r--ethosu/vela/compiler_driver.py4
-rw-r--r--ethosu/vela/high_level_command_stream_generator.py4
-rw-r--r--ethosu/vela/live_range.py27
-rw-r--r--ethosu/vela/scheduler.py4
-rw-r--r--ethosu/vela/tensor_allocation.py2
-rw-r--r--ethosu/vela/vela.py8
7 files changed, 6 insertions, 55 deletions
diff --git a/OPTIONS.md b/OPTIONS.md
index 07cf78e..a7b513c 100644
--- a/OPTIONS.md
+++ b/OPTIONS.md
@@ -72,18 +72,6 @@ system's SRAM this optimisation is required.
vela network.tflite --cascading False
```
-### IFM/OFM Overlap
-
-Controls the overlapping of IFM and OFM buffers. This means that IFM and OFM
-buffers may overlap if possible. This allows for lower memory usage.
-**Type: Boolean**
-**Default: True**
-
-```bash
-vela network.tflite --ifm-ofm-overlap False
-```
-
-
### Force Block Config
Force a specific block configuration in the format HxWxC, where H, W, and C are
diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py
index 1d7521b..9263305 100644
--- a/ethosu/vela/compiler_driver.py
+++ b/ethosu/vela/compiler_driver.py
@@ -194,7 +194,6 @@ def compiler_driver(nng, arch, options, scheduler_options):
arch,
permanent_storage,
set((MemType.Permanent_NPU,)),
- use_ifm_ofm_overlap=scheduler_options.use_ifm_ofm_overlap,
tensor_allocator=TensorAllocator.LinearAlloc,
verbose_allocation=options.verbose_allocation,
show_minimum_possible_allocation=options.show_minimum_possible_allocation,
@@ -232,7 +231,6 @@ def compiler_driver(nng, arch, options, scheduler_options):
arch,
mem_area,
mem_type_set,
- use_ifm_ofm_overlap=scheduler_options.use_ifm_ofm_overlap,
tensor_allocator=options.tensor_allocator,
verbose_allocation=options.verbose_allocation,
show_minimum_possible_allocation=options.show_minimum_possible_allocation,
@@ -259,7 +257,6 @@ def compiler_driver(nng, arch, options, scheduler_options):
mem_type_set,
max_size=arch.sram_size,
dry_test=dry_test,
- use_ifm_ofm_overlap=scheduler_options.use_ifm_ofm_overlap,
tensor_allocator=options.tensor_allocator,
verbose_allocation=options.verbose_allocation,
show_minimum_possible_allocation=options.show_minimum_possible_allocation,
@@ -305,7 +302,6 @@ def compiler_driver(nng, arch, options, scheduler_options):
arch,
permanent_storage,
set((MemType.Permanent_CPU,)),
- use_ifm_ofm_overlap=scheduler_options.use_ifm_ofm_overlap,
tensor_allocator=TensorAllocator.LinearAlloc,
verbose_allocation=options.verbose_allocation,
show_minimum_possible_allocation=options.show_minimum_possible_allocation,
diff --git a/ethosu/vela/high_level_command_stream_generator.py b/ethosu/vela/high_level_command_stream_generator.py
index 3e3cda1..01fab0e 100644
--- a/ethosu/vela/high_level_command_stream_generator.py
+++ b/ethosu/vela/high_level_command_stream_generator.py
@@ -394,7 +394,3 @@ def calc_allowed_ofm_ifm_overlap_for_pass_list(strat, passes, block_configs):
min_overlap = max(min_overlap, 0)
return min_overlap
-
-
-def calc_allowed_ofm_ifm_overlap_for_cascaded_pass(cps):
- return calc_allowed_ofm_ifm_overlap_for_pass_list(cps.strategy, cps.passes, [ps.block_config for ps in cps.passes])
diff --git a/ethosu/vela/live_range.py b/ethosu/vela/live_range.py
index b884035..a29cafe 100644
--- a/ethosu/vela/live_range.py
+++ b/ethosu/vela/live_range.py
@@ -16,7 +16,6 @@
# Description:
# Build a live range graph for tensors in one or more subgraphs. Used for tensor allocation as well as in the scheduler.
# Can work with either a pass packed subgraph or a scheduled subgraph.
-from .high_level_command_stream_generator import calc_allowed_ofm_ifm_overlap_for_cascaded_pass
from .nn_graph import PassPlacement
from .operation import Op
from .tensor import MemType
@@ -101,7 +100,6 @@ class LiveRange:
class LiveRangeGraph:
def __init__(self):
self.ranges = {} # tens -> range
- self.allowed_overlaps = {} # (tens,tens) -> overlap_int
self.ignore_tensors = set()
self.processed_subgraphs = set()
self.current_time = 0
@@ -198,7 +196,7 @@ def merge_elementwise_op_ranges(ps, lr_graph, target_mem_area, target_mem_type_s
def extract_live_ranges_from_passes(
sg,
target_mem_area,
- target_mem_type=set((MemType.Scratch, MemType.Scratch_fast)),
+ target_mem_type_set=set((MemType.Scratch, MemType.Scratch_fast)),
ignore_subgraph_input_output_tensors=False,
):
lr_graph = LiveRangeGraph()
@@ -209,7 +207,7 @@ def extract_live_ranges_from_passes(
# Try to merge live ranges of operations in the NPU subgraphs
if sg.placement == PassPlacement.Npu:
- merge_op_ranges(sg, lr_graph, target_mem_area, target_mem_type)
+ merge_op_ranges(sg, lr_graph, target_mem_area, target_mem_type_set)
for idx, ps in enumerate(sg.passes):
ps.time = 2 * idx
@@ -217,14 +215,14 @@ def extract_live_ranges_from_passes(
time_for_pass = ps.time
for tens in ps.inputs + ps.intermediates + ps.outputs:
- if tensor_should_be_ignored(lr_graph, tens, target_mem_area, target_mem_type):
+ if tensor_should_be_ignored(lr_graph, tens, target_mem_area, target_mem_type_set):
continue
rng = lr_graph.get_or_create_range(tens)
rng.mark_usage(time_for_pass)
end_time = len(sg.passes) * 2
for tens in sg.output_tensors:
- if tensor_should_be_ignored(lr_graph, tens, target_mem_area, target_mem_type):
+ if tensor_should_be_ignored(lr_graph, tens, target_mem_area, target_mem_type_set):
continue
rng = lr_graph.get_or_create_range(tens)
rng.mark_usage(end_time)
@@ -236,7 +234,6 @@ def extract_live_ranges_from_cascaded_passes(
sg,
target_mem_area,
target_mem_type_set,
- use_ifm_ofm_overlap=True,
ignore_subgraph_input_output_tensors=False,
lr_graph=None,
allocation_alignment=Tensor.AllocationQuantum,
@@ -279,7 +276,7 @@ def extract_live_ranges_from_cascaded_passes(
# Use default allocation alignment of 16 for Npu tensors
npu_sg = cps_primary_op.attrs["subgraph"]
lr_graph = extract_live_ranges_from_cascaded_passes(
- npu_sg, target_mem_area, target_mem_type_set, use_ifm_ofm_overlap, False, lr_graph,
+ npu_sg, target_mem_area, target_mem_type_set, False, lr_graph,
)
# Set the new time after handling the Npu subgraph
time_for_pass = lr_graph.current_time
@@ -291,20 +288,6 @@ def extract_live_ranges_from_cascaded_passes(
rng = lr_graph.get_or_create_range(tens, allocation_alignment)
rng.mark_usage(time_for_pass)
- if use_ifm_ofm_overlap:
- # fill allowed overlap for ifm and ofm tensor
- ifm_tensor = cps.passes[0].ifm_tensor
- ofm_tensor = cps.passes[-1].ofm_tensor
- if (
- ifm_tensor is not None
- and ofm_tensor is not None
- and not tensor_should_be_ignored(lr_graph, ifm_tensor, target_mem_area, target_mem_type_set)
- and not tensor_should_be_ignored(lr_graph, ofm_tensor, target_mem_area, target_mem_type_set)
- ):
- lr_graph.allowed_overlaps[(ifm_tensor, ofm_tensor)] = calc_allowed_ofm_ifm_overlap_for_cascaded_pass(
- cps
- )
-
lr_graph.current_time += 2
end_time = 0
diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py
index 31e6383..59c2b58 100644
--- a/ethosu/vela/scheduler.py
+++ b/ethosu/vela/scheduler.py
@@ -59,7 +59,6 @@ class SchedulerOptions:
def __init__(
self,
use_cascading=True,
- use_ifm_ofm_overlap=True,
verbose_schedule=False,
verbose_pareto_frontier_schedules=False,
use_ifm_streaming=True,
@@ -67,7 +66,6 @@ class SchedulerOptions:
use_nhcwb16_between_cascaded_passes=True,
):
self.use_cascading = use_cascading
- self.use_ifm_ofm_overlap = use_ifm_ofm_overlap
self.verbose_schedule = verbose_schedule
self.verbose_pareto_frontier_schedules = verbose_pareto_frontier_schedules
self.use_ifm_streaming = use_ifm_streaming
@@ -236,7 +234,7 @@ class DynamicProgrammingScheduler:
if self.arch.feature_map_storage_mem_area != MemArea.Sram:
self.use_ifm_ofm_overlap = False # force off IFM/OFM overlap if IFMs and OFMs are not in the SRAM
else:
- self.use_ifm_ofm_overlap = options.use_ifm_ofm_overlap
+ self.use_ifm_ofm_overlap = True
self.verbose_schedule = options.verbose_schedule
self.verbose_pareto_frontier_schedules = options.verbose_pareto_frontier_schedules
diff --git a/ethosu/vela/tensor_allocation.py b/ethosu/vela/tensor_allocation.py
index 9f14ec4..8329a61 100644
--- a/ethosu/vela/tensor_allocation.py
+++ b/ethosu/vela/tensor_allocation.py
@@ -122,7 +122,6 @@ def allocate_tensors(
arch,
mem_area,
mem_type_set,
- use_ifm_ofm_overlap=True,
tensor_allocator=TensorAllocator.Greedy,
verbose_allocation=False,
show_minimum_possible_allocation=False,
@@ -137,7 +136,6 @@ def allocate_tensors(
sg,
mem_area,
mem_type_set,
- use_ifm_ofm_overlap=use_ifm_ofm_overlap,
ignore_subgraph_input_output_tensors=ignore_subgraph_input_output_tensors,
lr_graph=lr_graph,
allocation_alignment=allocation_alignment,
diff --git a/ethosu/vela/vela.py b/ethosu/vela/vela.py
index b9e224c..6d54187 100644
--- a/ethosu/vela/vela.py
+++ b/ethosu/vela/vela.py
@@ -158,13 +158,6 @@ def main(args=None):
choices=[True, False],
help="Controls the packing of multiple passes into a cascade (default: %(default)s)",
)
- parser.add_argument(
- "--ifm-ofm-overlap",
- type=ast.literal_eval,
- default=True,
- choices=[True, False],
- help="Controls the overlapping of IFM and OFM buffers (default: %(default)s)",
- )
parser.add_argument("--force-block-config", type=str, default="", help="Force a specific block configuration HxWxC")
parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")
parser.add_argument(
@@ -313,7 +306,6 @@ def main(args=None):
scheduler_options = scheduler.SchedulerOptions(
use_cascading=args.cascading,
- use_ifm_ofm_overlap=args.ifm_ofm_overlap,
verbose_schedule=args.verbose_schedule,
verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules,
use_ifm_streaming=args.ifm_streaming,