diff options
author | Jacob Bohlin <jacob.bohlin@arm.com> | 2020-09-11 10:04:15 +0200 |
---|---|---|
committer | patrik.gustavsson <patrik.gustavsson@arm.com> | 2020-09-17 08:18:50 +0000 |
commit | 1a66697b80a527af6d6dd1ed235199264696767e (patch) | |
tree | 447f19903eedb0ed163348769da28267ccf3bf47 /ethosu/vela/scheduler.py | |
parent | 1356c2ab034738bcf51822de18911cc499fa2e8e (diff) | |
download | ethos-u-vela-1a66697b80a527af6d6dd1ed235199264696767e.tar.gz |
MLBEDSW-2809: Redo the Tensor addressing
Added a static class TensorAddressMap that stores all Tensor addresses
based on their equivalence_id. Made the "address" field into a property
which getter and setter looks up/sets the tensor's address in
TensorAddressMap.
This makes the references to cpu_tensor/npu_tensor obsolete and they
have been removed.
Addition to scheduler: avoid SRAM spilling if an op has consumers in
other subgraphs.
Minor rework in LUTState; it will now assign a unique equivalence_id to
the SHRAM lut tensor to avoid issues with addressing. The equivalent
checks in LUTState now compares the values of the LUT instead of the the
equivalence_id.
Updated LUT unit tests accordingly.
Signed-off-by: Jacob Bohlin <jacob.bohlin@arm.com>
Change-Id: I41de5a8a4e5f07b77d6544d8d4034b754993e503
Diffstat (limited to 'ethosu/vela/scheduler.py')
-rw-r--r-- | ethosu/vela/scheduler.py | 17 |
1 files changed, 14 insertions, 3 deletions
diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py index e9a93c19..47f8a47f 100644 --- a/ethosu/vela/scheduler.py +++ b/ethosu/vela/scheduler.py @@ -35,6 +35,7 @@ from .npu_performance import make_cycles_array from .npu_performance import make_macs_array from .npu_performance import make_metrics_arrays from .npu_performance import PassCycles +from .numeric_util import full_shape from .operation import NpuBlockType from .shared_buffer_allocation import find_block_configs_suitable_for_pass_and_shared_buffer from .shared_buffer_allocation import shared_buffer_allocation_for_pass_and_block_config @@ -43,7 +44,7 @@ from .tensor import MemType from .tensor import TensorFormat from .tensor import TensorPurpose from .tensor import TensorSubPurpose -from .numeric_util import full_shape + class ParetoMetric(enum.Enum): BwCycMem = 1 @@ -652,6 +653,9 @@ class DynamicProgrammingScheduler: for op in pred_candidate.ops: if op.type == "ConcatSliceWrite": return True + if len(op.outputs) > 1 or len(op.outputs[0].consumer_list) > 1: + # The op has consumers in other subgraphs + return True return False def search_ifm_streaming_partial(self, ps, block_config): @@ -976,8 +980,15 @@ class DynamicProgrammingScheduler: # be processed by CPU operations. No-op reshape consumers with empty lists # (those that have no consumers, or null-consumers used as list terminators) # must use normal NHWC output. - incompatible_consumers = [ (not consumer.run_on_npu or consumer.type == "Reshape" or (consumer is last_op_in_subgraph)) - for consumer in op.outputs[0].consumer_list if consumer is not None ] + incompatible_consumers = [ + ( + not consumer.run_on_npu + or consumer.type == "Reshape" + or (consumer is last_op_in_subgraph) + ) + for consumer in op.outputs[0].consumer_list + if consumer is not None + ] if (outshape == inshape) and incompatible_consumers and not any(incompatible_consumers): rewrites.append(op) else: |