diff options
-rw-r--r-- | ethosu/vela/architecture_features.py | 9 | ||||
-rw-r--r-- | ethosu/vela/shared_buffer_allocation.py | 3 | ||||
-rw-r--r-- | ethosu/vela/tensor.py | 5 | ||||
-rw-r--r-- | ethosu/vela/tflite_reader.py | 6 |
4 files changed, 17 insertions, 6 deletions
diff --git a/ethosu/vela/architecture_features.py b/ethosu/vela/architecture_features.py index 1bf9d950..b59122ea 100644 --- a/ethosu/vela/architecture_features.py +++ b/ethosu/vela/architecture_features.py @@ -22,6 +22,7 @@ from configparser import ConfigParser import numpy as np from .errors import OptionError +from .ethos_u55_regs.ethos_u55_regs import resampling_mode from .numeric_util import round_up from .numeric_util import round_up_divide from .operation import NpuBlockType @@ -347,10 +348,10 @@ Note the difference between ArchitectureFeatures and CompilerOptions return min(max_block_depth, ifm_depth) # Calculate the size of the IFM block given a depth, target OFM block and a kernel - def get_ifm_block_size( - self, ifm_block_depth, ofm_block: Block, kernel: Kernel, subkernel: Block = Block(8, 8, 65536) - ): - upscaling = 1 + def get_ifm_block_size(self, ifm_block_depth, ofm_block: Block, + kernel: Kernel, subkernel: Block = Block(8, 8, 65536), + ifm_resampling_mode=resampling_mode.NONE): + upscaling = 1 if ifm_resampling_mode == resampling_mode.NONE else 2 # Height ifm_odd_2x_height_enable = 0 dilated_kernel_height = ((kernel.height - 1) * kernel.dilation.y) + 1 diff --git a/ethosu/vela/shared_buffer_allocation.py b/ethosu/vela/shared_buffer_allocation.py index 2bfe5941..72caa1b6 100644 --- a/ethosu/vela/shared_buffer_allocation.py +++ b/ethosu/vela/shared_buffer_allocation.py @@ -85,6 +85,7 @@ class SharedBufferAllocation: else: assert self.ifm_bits == 8, "Unexpected IFM bitdepth" + self.ifm_resampling_mode = ifm_tensor.resampling_mode self.ifm_block_depth = arch.calc_ifm_block_depth(self.ifm_depth, self.ifm_bits) self.ofm_tensor = ofm_tensor @@ -105,7 +106,7 @@ class SharedBufferAllocation: def try_block(self, ofm_block: Block): # Get IFM block configuration ifm_block_depth = ofm_block.depth if self.is_equal_depth_op else self.ifm_block_depth - ifm_block = self.arch.get_ifm_block_size(ifm_block_depth, ofm_block, self.kernel) + ifm_block = self.arch.get_ifm_block_size(ifm_block_depth, ofm_block, self.kernel, ifm_resampling_mode=self.ifm_resampling_mode) ifm_config = self.arch.get_block_config(ifm_block.width, ifm_block.height, ifm_block.depth) if ifm_config is None: return None diff --git a/ethosu/vela/tensor.py b/ethosu/vela/tensor.py index 19258b52..160cf630 100644 --- a/ethosu/vela/tensor.py +++ b/ethosu/vela/tensor.py @@ -21,6 +21,7 @@ import uuid import numpy as np from . import numeric_util +from .ethos_u55_regs.ethos_u55_regs import resampling_mode from .numeric_util import round_up_divide from .range_set import MemoryRangeSet @@ -230,6 +231,7 @@ class Tensor: "cpu_tensor", "npu_tensor", "equivalence_id", + "resampling_mode", ) AllocationQuantum = 16 @@ -274,6 +276,7 @@ class Tensor: self.reshaped = False self.block_traversal = TensorBlockTraversal.Default + self.resampling_mode = resampling_mode.NONE def element_size(self): if self.element_size_bytes == 0: @@ -312,6 +315,8 @@ class Tensor: else: res.quantization = None + res.resampling_mode = self.resampling_mode + return res def clone_into_fast_storage(self, arch): diff --git a/ethosu/vela/tflite_reader.py b/ethosu/vela/tflite_reader.py index 850690f2..4ee39634 100644 --- a/ethosu/vela/tflite_reader.py +++ b/ethosu/vela/tflite_reader.py @@ -20,6 +20,7 @@ import os.path import numpy as np from .errors import UnsupportedFeatureError +from .ethos_u55_regs.ethos_u55_regs import resampling_mode from .nn_graph import Graph from .nn_graph import Subgraph from .operation import Operation @@ -146,7 +147,8 @@ class TFLiteSubgraph: op.attrs = opt_serializer.deserialize(op_data.BuiltinOptions(), op_data.CustomOptionsAsNumpy()) if op_type.startswith("ResizeBilinear"): - upscaled_shape = [op.inputs[0].shape[1] * 2, op.inputs[0].shape[2] * 2] + input_tensor = op.inputs[0] + upscaled_shape = [input_tensor.shape[1] * 2, input_tensor.shape[2] * 2] out_shape = op.outputs[0].shape[1:3] if not op.attrs["align_corners"] and out_shape == upscaled_shape: # this means the output is supposed to be a x2 upscale, @@ -160,6 +162,8 @@ class TFLiteSubgraph: raise UnsupportedFeatureError("ResizeBilinear: Only 2x upscaling is supported") op.attrs.update({"filter_width": 2, "filter_height": 2, "stride_w": 1, "stride_h": 1}) + input_tensor.resampling_mode = resampling_mode.NEAREST + if "stride_w" in op.attrs: op.attrs["strides"] = (1, op.attrs["stride_h"], op.attrs["stride_w"], 1) if "filter_width" in op.attrs: |