diff options
author | Johan Alfven <johan.alfven@arm.com> | 2023-10-28 16:04:46 +0200 |
---|---|---|
committer | Johan Alfven <johan.alfven@arm.com> | 2023-11-09 11:59:58 +0100 |
commit | a8fda88bced0d11441467b6798885101d41ac8e9 (patch) | |
tree | 807de7fa4eee48720255fbed4a605218f8612f6a /ethosu/vela/pass_packing.py | |
parent | 4bf0cdf58416edc030ae7507ace95224785e4aa8 (diff) | |
download | ethos-u-vela-a8fda88bced0d11441467b6798885101d41ac8e9.tar.gz |
MLBEDSW-8290: MLCE: Add TRANSPOSE support3.10.0.rc1
- Added graph optimiser function to convert TRANSPOSE op
into an AvgPool op with swapped stride for height and width
- Added TRANSPOSE supported op check
- Added unit tests for TRANSPOSE supported op check
- Updated SUPPORTED_OPS.md
- Fixed problem in pass packing when optimizing the pass list.
Old problem, but now seen when moving TRANSPOSE from cpu.
Change-Id: I0a0ef420b0fb8241090c2e2434622881105cde15
Signed-off-by: Johan Alfven <johan.alfven@arm.com>
Diffstat (limited to 'ethosu/vela/pass_packing.py')
-rw-r--r-- | ethosu/vela/pass_packing.py | 11 |
1 files changed, 9 insertions, 2 deletions
diff --git a/ethosu/vela/pass_packing.py b/ethosu/vela/pass_packing.py index 4c733cce..0de0341d 100644 --- a/ethosu/vela/pass_packing.py +++ b/ethosu/vela/pass_packing.py @@ -481,7 +481,7 @@ def pack_into_passes(nng, arch, verbose_packing=False): # Try to optmize this by moving/grouping CPU ops where that is possible. # Criteria for CPU pass to be moved: # - # 1) CPU passes that only depends on sg.input_tensor can be + # 1) CPU passes that only depends on sg.input_tensors can be # moved to the top of the list. # ResourceVariables ops like VarHandle, ReadVariable, CallOnce # can also be moved to the top of list. @@ -503,9 +503,16 @@ def pack_into_passes(nng, arch, verbose_packing=False): pass_list_top.insert(0, ps) continue + ifm2 = ps.ops[0].ifm2 + if ifm2 is None: + # Dynamic weights must be treated as ifm's. + if ps.ops[0].type == Op.FullyConnected and ps.ops[0].weights.purpose == TensorPurpose.FeatureMap: + # Op has dynamic weights, include this in the check below + ifm2 = ps.ops[0].weights + if ps.placement == PassPlacement.Cpu and ( ps.ops[0].ifm in sg.input_tensors - and (ps.ops[0].ifm2 in sg.input_tensors or ps.ops[0].ifm2 is None) + and (ifm2 in sg.input_tensors or ifm2 is None) or (ps.ops[0].type in (Op.VarHandle, Op.ReadVariable, Op.CallOnce)) ): # This CPU pass only depends on sg.input_tensors or resource variable |