diff options
author | Tim Hall <tim.hall@arm.com> | 2020-08-26 17:27:19 +0100 |
---|---|---|
committer | Tim Hall <tim.hall@arm.com> | 2020-08-26 18:32:45 +0100 |
commit | ba69518cef84a495c104e51d100875cdca717a22 (patch) | |
tree | a3edea8ed07b9cc58dbaa43859506c901cffb012 /ethosu/vela/scheduler.py | |
parent | e55e274c6e894c45204702cbeb475d8f0a7c595e (diff) | |
download | ethos-u-vela-ba69518cef84a495c104e51d100875cdca717a22.tar.gz |
MLBEDSW-2686: Use NPU tensor format for noop reshapes.1.2.0.rc2
- Reshapes that merely add/remove dimensions, rather than re-layout the
data need not fall back to NHWC. This commit allows reshapes betweeen
NPU operators to use NHCWB16.
Signed-off-by: Tim Hall <tim.hall@arm.com>
Change-Id: Ieb7745e586bf324e92e741a04b74caf7285f4b8b
Diffstat (limited to 'ethosu/vela/scheduler.py')
-rw-r--r-- | ethosu/vela/scheduler.py | 35 |
1 files changed, 25 insertions, 10 deletions
diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py index 9a8215d5..9b492f01 100644 --- a/ethosu/vela/scheduler.py +++ b/ethosu/vela/scheduler.py @@ -42,7 +42,7 @@ from .tensor import MemType from .tensor import TensorFormat from .tensor import TensorPurpose from .tensor import TensorSubPurpose - +from .numeric_util import full_shape class ParetoMetric(enum.Enum): BwCycMem = 1 @@ -957,21 +957,36 @@ class DynamicProgrammingScheduler: if ps.placement != PassPlacement.Npu: continue for output in ps.outputs: - if output.purpose != TensorPurpose.FeatureMap: + if output.purpose != TensorPurpose.FeatureMap or output.avoid_NHCWB16: continue - use_NHCWB16 = not output.avoid_NHCWB16 - - if use_NHCWB16: - # Check consumers, to see if NHCWB16 can be used in the output - for op in output.consumer_list: - if op is None or op.type == "Reshape": - use_NHCWB16 = False + use_NHCWB16 = True + rewrites = [] + for op in output.consumer_list: + if op is None: + use_NHCWB16 = False + elif op.type == "Reshape": + # Detect no-op reshapes by comparing their full input and output tensor shapes. + inshape = full_shape(4, op.inputs[0].shape, 1) + outshape = full_shape(4, op.outputs[0].shape, 1) + # Using NHCWB16 format for a no-op reshape is only an option if subsequent + # consumers do not also need to perform a reshape or if the OFM is going to + # be processed by CPU operations. No-op reshape consumers with empty lists + # (those that have no consumers, or null-consumers used as list terminators) + # must use normal NHWC output. + incompatible_consumers = [ (not consumer.run_on_npu or consumer.type == "Reshape") for consumer in op.outputs[0].consumer_list + if consumer is not None ] + if (outshape == inshape) and incompatible_consumers and not any(incompatible_consumers): + rewrites.append(op) else: - use_NHCWB16 &= op.run_on_npu + use_NHCWB16 = False + else: + use_NHCWB16 &= op.run_on_npu if use_NHCWB16: output.set_format(TensorFormat.NHCWB16, arch) + for rewrite_op in rewrites: + rewrite_op.outputs[0].set_format(TensorFormat.NHCWB16, arch) def schedule_passes(nng, arch, options: SchedulerOptions): |