aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonas Ohlsson <jonas.ohlsson@arm.com>2022-03-01 12:39:55 +0100
committerJonas Ohlsson <jonas.ohlsson@arm.com>2022-03-21 11:09:39 +0100
commit845e23200d471e44f274940846e400d170b5ff37 (patch)
tree28a01492bf11f0ff69309ead9bd8a1bad9e14cbb
parentd2b5510697e7789f5a416f9d80d3cb640eecc092 (diff)
downloadethos-u-vela-845e23200d471e44f274940846e400d170b5ff37.tar.gz
MLBEDSW-3367 Add mypy to pre-commit
Add mypy to pre-commit and clean up all reported errors. Signed-off-by: Jonas Ohlsson <jonas.ohlsson@arm.com> Change-Id: If7dc869f5fecdb0e2db40f14e7d9db21aa33df71
-rw-r--r--.pre-commit-config.yaml9
-rw-r--r--ethosu/mlw_codec/test/test_mlw_codec.py5
-rw-r--r--ethosu/vela/architecture_allocator.py36
-rw-r--r--ethosu/vela/high_level_command_stream.py5
-rw-r--r--ethosu/vela/high_level_command_to_npu_op.py11
-rw-r--r--ethosu/vela/hillclimb_allocation.py6
-rw-r--r--ethosu/vela/nn_graph.py4
-rw-r--r--ethosu/vela/npu_performance.py3
-rw-r--r--ethosu/vela/operation.py19
-rw-r--r--ethosu/vela/operation_util.py2
-rw-r--r--ethosu/vela/register_command_stream_generator.py9
-rw-r--r--ethosu/vela/register_command_stream_util.py6
-rw-r--r--ethosu/vela/scheduler.py35
-rw-r--r--ethosu/vela/tensor.py2
-rw-r--r--ethosu/vela/tensor_allocation.py2
-rw-r--r--ethosu/vela/weight_compressor.py6
-rw-r--r--setup.py2
17 files changed, 102 insertions, 60 deletions
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8e976b6..ae2bae5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,5 +1,14 @@
exclude: '^ethosu/vela/(tflite|ethos_u55_regs|tosa)/'
repos:
+- repo: https://github.com/pre-commit/mirrors-mypy
+ rev: 'v0.931'
+ hooks:
+ - id: mypy
+ args: ["--no-strict-optional", "--show-error-codes", "--ignore-missing-imports"]
+ require_serial: true
+ additional_dependencies: [types-setuptools]
+ minimum_pre_commit_version: '2.9.2'
+
- repo: https://github.com/asottile/reorder_python_imports
rev: v2.2.0
hooks:
diff --git a/ethosu/mlw_codec/test/test_mlw_codec.py b/ethosu/mlw_codec/test/test_mlw_codec.py
index 18c828a..3ff26e5 100644
--- a/ethosu/mlw_codec/test/test_mlw_codec.py
+++ b/ethosu/mlw_codec/test/test_mlw_codec.py
@@ -15,6 +15,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# Simple example of the usage of mlw_codec.
+from typing import Any
+from typing import List
+
import pytest
from ethosu import mlw_codec
@@ -68,7 +71,7 @@ class TestMLWCodec:
with pytest.raises(Exception):
mlw_codec.encode(input)
- invalid_decode_test_data = [None, 3, []]
+ invalid_decode_test_data: List[Any] = [None, 3, []]
@pytest.mark.parametrize("input", invalid_decode_test_data)
def test_decode_invalid_input(self, input):
diff --git a/ethosu/vela/architecture_allocator.py b/ethosu/vela/architecture_allocator.py
index 84d8354..d27f126 100644
--- a/ethosu/vela/architecture_allocator.py
+++ b/ethosu/vela/architecture_allocator.py
@@ -17,8 +17,10 @@
# Description: Architecture SHRAM allocator
import enum
import math
+from typing import Dict
from typing import Optional
from typing import Tuple
+from typing import Union
from .architecture_features import ArchitectureFeatures
from .architecture_features import Block
@@ -77,8 +79,8 @@ class ElementwiseUsage(enum.IntEnum):
def _try_block_config(
shram: SHRAMConfig,
ew_usage: ElementwiseUsage,
- ofm_block: Block,
- ifm_block: Block,
+ ofm_block: Union[Shape4D, Block],
+ ifm_block: Union[Shape4D, Block],
ifm_bits: int,
ifm_granule: int,
acc_bits: int,
@@ -86,7 +88,7 @@ def _try_block_config(
lut_banks: int,
ifm_depth_buf_scaling: int,
cores: int,
-) -> SHRAMLayout:
+) -> Union[SHRAMLayout, None]:
assert (acc_bits > 0) and (acc_granule > 0)
assert (ifm_bits >= 8) and ((ifm_bits % 8) == 0) and (ifm_granule > 0)
@@ -173,7 +175,7 @@ def to_upscale(ifm_resampling: resampling_mode) -> int:
return 1 if ifm_resampling == resampling_mode.NONE else 2
-def _ifm_blockdepth(arch, ifm_shape: Shape4D, ifm_bits: int, is_partkernel: bool):
+def _ifm_blockdepth(arch, ifm_shape: Union[Shape4D, Block], ifm_bits: int, is_partkernel: bool):
if ifm_bits == 16:
ifm_blockdepth = round_up(min(ifm_shape.depth, 16), 4)
else:
@@ -185,7 +187,9 @@ def _required_size(value: int, stride: int, border: int, upscale: int, nearest:
return int(math.ceil(((value - 1) * stride + border + nearest) / upscale))
-def get_ifm_area_required(ofm_shape: Shape4D, kernel: Kernel, resampling_mode: resampling_mode) -> Tuple[int, int]:
+def get_ifm_area_required(
+ ofm_shape: Union[Shape4D, Block], kernel: Kernel, resampling_mode: resampling_mode
+) -> Tuple[int, int]:
upscale = to_upscale(resampling_mode)
nearest = is_nearest(resampling_mode)
h1 = _required_size(ofm_shape.height, kernel.stride.y, kernel.area_height(), upscale, nearest)
@@ -194,7 +198,7 @@ def get_ifm_area_required(ofm_shape: Shape4D, kernel: Kernel, resampling_mode: r
def _get_ifm_blocksize(
- ofm_block: Shape4D, kernel: Kernel, ublock: Block, subkernel_limit: Block, upscale: int, nearest: bool
+ ofm_block: Union[Shape4D, Block], kernel: Kernel, ublock: Block, subkernel_limit: Block, upscale: int, nearest: bool
) -> Shape4D:
# IFM block height
h1 = _required_size(
@@ -213,7 +217,9 @@ def _get_ifm_blocksize(
return Shape4D(1, height, width, ofm_block.depth)
-def fit_block_for_ofm(arch: ArchitectureFeatures, ofm_shape: Shape4D, kernel: Kernel, block: Shape4D):
+def fit_block_for_ofm(
+ arch: ArchitectureFeatures, ofm_shape: Union[Shape4D, Block], kernel: Kernel, block: Union[Shape4D, Block]
+):
# 256/512 Conv1D optimisation (ratio of IFM:Accumulators changes) This is a specific
# interpretation of a more general constraint that can't be applied because the
# find_block_config function must return block configs that can be applied to any OFM shape.
@@ -227,14 +233,14 @@ def find_block_config(
npu_op_type: NpuBlockType,
ofm_shape: Shape4D,
ifm_shape: Shape4D,
- ifm2_shape: Shape4D,
+ ifm2_shape: Optional[Shape4D],
uses_scalar: bool,
ifm_bits: int,
kernel: Kernel,
lut_banks: int,
scaled: bool,
ifm_resampling: resampling_mode,
-) -> ArchitectureBlockConfig:
+) -> Optional[ArchitectureBlockConfig]:
SplitDepth = ArchitectureFeatures.OFMSplitDepth
# Elementwise larger-volume correction
if ifm2_shape is not None and ifm2_shape.elements() > ifm_shape.elements():
@@ -296,7 +302,7 @@ def find_block_config(
depth = round_up(depth, SplitDepth)
while depth <= search_space.depth:
- wont_fit = {}
+ wont_fit: Dict[Tuple[int, int], bool] = {}
for height in range(arch.ofm_ublock.height, search_space.height + 1, arch.ofm_ublock.height):
for width in range(arch.ofm_ublock.width, search_space.width + 1, arch.ofm_ublock.width):
# Avoid checking W/H transposed blocks that already didn't fit. i.e. if 8x4x16 didn't
@@ -315,8 +321,8 @@ def find_block_config(
layout = _try_block_config(
arch.shram,
ew_usage,
- ofm_block,
- ifm_block,
+ Block(ofm_block.width, ofm_block.height, ofm_block.depth),
+ Block(ifm_block.width, ifm_block.height, ifm_block.depth),
ifm_bits,
ifm_granule,
acc_bits,
@@ -385,9 +391,9 @@ def try_block_config(
block_config: Block,
arch: ArchitectureFeatures,
npu_op_type: NpuBlockType,
- ofm_shape: Block,
- ifm_shape: Block,
- ifm2_shape: Optional[Block],
+ ofm_shape: Union[Shape4D, Block],
+ ifm_shape: Union[Shape4D, Block],
+ ifm2_shape: Optional[Union[Shape4D, Block]],
uses_scalar: bool,
ifm_bits: int,
is_partkernel: bool,
diff --git a/ethosu/vela/high_level_command_stream.py b/ethosu/vela/high_level_command_stream.py
index 7e60221..0009f6c 100644
--- a/ethosu/vela/high_level_command_stream.py
+++ b/ethosu/vela/high_level_command_stream.py
@@ -16,6 +16,7 @@
# Description:
# Contains classes that hold commands for the high-level command stream (one command per DMA or NPU stripe).
from typing import List
+from typing import Optional
import numpy as np
@@ -41,8 +42,8 @@ class Box:
npu_block_type: NpuBlockType,
concat_offsets: List[int],
k_dilated_height: int,
- split_offset: Shape4D = None,
- split_shape: Shape4D = None,
+ split_offset: Optional[Shape4D] = None,
+ split_shape: Optional[Shape4D] = None,
upscaling_factor: int = 1,
):
new_start_coord = list(self.start_coord)
diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py
index 6c403c8..f7c91aa 100644
--- a/ethosu/vela/high_level_command_to_npu_op.py
+++ b/ethosu/vela/high_level_command_to_npu_op.py
@@ -17,9 +17,11 @@
# Description:
# Conversion from high level command to NpuOperation
from enum import IntEnum
+from typing import cast
from typing import Dict
from typing import List
from typing import Optional
+from typing import Tuple
from .api import NpuActivation
from .api import NpuActivationOp
@@ -66,6 +68,7 @@ from .tensor import Tensor
from .tensor import TensorFormat
from .tensor import TensorPurpose
from .tensor import TensorSubPurpose
+from .weight_compressor import NpuWeightTensor
from .weight_compressor import WeightKey
@@ -294,17 +297,17 @@ def create_feature_map(tens: Tensor, box: Box, arch: ArchitectureFeatures, op_sh
def create_weights(
- weight_tensor: Tensor, weight_box: Box, scale_tensor: Tensor, arch: ArchitectureFeatures
-) -> List[NpuAddressRange]:
+ weight_tensor: NpuWeightTensor, weight_box: Box, scale_tensor: NpuWeightTensor, arch: ArchitectureFeatures
+) -> Tuple[List[NpuAddressRange], List[NpuAddressRange]]:
"""Returns address ranges for weights and scales"""
weights = []
biases = []
shared_region = get_region(weight_tensor.mem_type, arch)
- scale_region = scale_tensor and get_region(scale_tensor.mem_type, arch)
+ scale_region = get_region(scale_tensor.mem_type, arch) if scale_tensor else 0
w_tensor_src = weight_tensor
if weight_tensor.src_tensor:
- w_tensor_src = weight_tensor.src_tensor
+ w_tensor_src = cast(NpuWeightTensor, weight_tensor.src_tensor)
core_offset = 0
for core in range(0, arch.ncores):
diff --git a/ethosu/vela/hillclimb_allocation.py b/ethosu/vela/hillclimb_allocation.py
index 5e02dac..2271fe9 100644
--- a/ethosu/vela/hillclimb_allocation.py
+++ b/ethosu/vela/hillclimb_allocation.py
@@ -101,7 +101,7 @@ class HillClimbAllocator:
LiveRangeInfo(id, lr.start_time, lr.end_time, lr.size, lr.get_alignment())
for id, lr in enumerate(live_ranges)
]
- self.lrs_at_time = []
+ self.lrs_at_time: List[List[LiveRangeInfo]] = []
# The available size (input to algorithm).
self.available_size: int = 0
# The algorithm stops once the target size has been achieved
@@ -227,8 +227,8 @@ class HillClimbAllocator:
# - direct neighbours of the bottleneck live range
# - direct and indirect predecessors of these neighbours + bottleneck
# The turns at which these live ranges were allocated are put in the turns set.
- turn_set = set()
- turn_list = list()
+ turn_set: Set[int] = set()
+ turn_list: List[int] = list()
self.add_predecessor_turns(turn_set, turn_list, max_lr)
for lr2 in max_lr.neighbours:
self.add_predecessor_turns(turn_set, turn_list, lr2)
diff --git a/ethosu/vela/nn_graph.py b/ethosu/vela/nn_graph.py
index 8a2517d..671843f 100644
--- a/ethosu/vela/nn_graph.py
+++ b/ethosu/vela/nn_graph.py
@@ -130,6 +130,7 @@ class CascadedPass:
self.predecessors = []
self.successors = []
self.sram_used = 0
+ self.time = 0
def __str__(self):
return "<nng.CascadedPass strategy=%s x %s '%s', passes=%s, block_configs=%s>" % (
@@ -537,6 +538,9 @@ class Graph:
self.total_npu_weights = 0
self.total_npu_encoded_weights = 0
self.weight_cache = None # See CompressedWeightCache
+ self.bandwidths = 0
+ self.macs = 0
+ self.cycles = 0
def get_root_subgraph(self):
return self.subgraphs[0]
diff --git a/ethosu/vela/npu_performance.py b/ethosu/vela/npu_performance.py
index 34530ae..8c4aee6 100644
--- a/ethosu/vela/npu_performance.py
+++ b/ethosu/vela/npu_performance.py
@@ -22,6 +22,7 @@
import copy
from enum import auto
from enum import IntEnum
+from typing import Optional
from typing import Set
from uuid import UUID
@@ -580,7 +581,7 @@ def update_summary_cycles(arch, bws, cycles):
def estimate_full_op_performance(
- arch, schedule: Schedule, op: SchedulerOperation, prev_op: SchedulerOperation, block_config
+ arch, schedule: Schedule, op: SchedulerOperation, prev_op: Optional[SchedulerOperation], block_config
):
cycles_a = make_cycles_array()
bws = make_bandwidth_array()
diff --git a/ethosu/vela/operation.py b/ethosu/vela/operation.py
index 277f2de..5a6423d 100644
--- a/ethosu/vela/operation.py
+++ b/ethosu/vela/operation.py
@@ -15,6 +15,9 @@
# limitations under the License.
# Description:
# Internal representation of a Neural Network Operation.
+# For Class name forward references for the type annotations. (see PEP 563).
+from __future__ import annotations
+
import copy
from collections import namedtuple
from enum import Enum
@@ -24,13 +27,14 @@ from typing import List
from typing import Optional
from typing import Tuple
from typing import TYPE_CHECKING
+from typing import Union
from .api import NpuRoundingMode
from .errors import VelaError
from .numeric_util import full_shape
from .shape4d import Shape4D
-
+# Import needed for Type annotations. Only import for Type checking to avoid run-time errors due to cyclic import.
if TYPE_CHECKING:
from .tensor import Tensor
@@ -80,9 +84,6 @@ class Kernel:
def area_height(self) -> int:
return (self.height - 1) * self.dilation.y + 1
- def dilation(self) -> PointXY:
- return self.dilation
-
def dilated_wh(self) -> Tuple[int, int]:
"""Returns the dilated kernel width/height"""
return self.dilation.x * (self.width - 1) + 1, self.dilation.y * (self.height - 1) + 1
@@ -443,7 +444,7 @@ def create_activation_function(op_type: Op, min=None, max=None) -> ActivationFun
return act
-def get_slice_offsets(input_shape: List[int], offset_tens: int, offset_mask: int, is_begin: bool = True):
+def get_slice_offsets(input_shape: List[int], offset_tens: Tensor, offset_mask: int, is_begin: bool = True):
# For strided slice operator: get start or end offsets
offsets = len(input_shape) * [0] if is_begin else input_shape[:]
for idx in range(len(input_shape)):
@@ -493,7 +494,7 @@ class Operation:
self.type = op_type
self.name = name
self.attrs: Dict[str, Any] = {}
- self.inputs: List[Tensor] = []
+ self.inputs: List[Optional[Tensor]] = []
self.outputs: List[Tensor] = []
self.intermediates: List[Tensor] = []
self.flops = 0
@@ -514,9 +515,9 @@ class Operation:
self.ofm_shapes: List[Shape4D] = []
# If not none: contains rescale to be used as output scaling
# (which overrides the ofm tensor's scale)
- self.rescale = None
- self.read_offsets: List[Shape4D] = [None, None] # offset for [ifm, ifm2]
- self.read_shapes: List[Shape4D] = [None, None] # read shape for [ifm, ifm2]
+ self.rescale: Optional[Union[Tuple[int, int], ExplicitScaling]] = None
+ self.read_offsets: List[Optional[Shape4D]] = [None, None] # offset for [ifm, ifm2]
+ self.read_shapes: List[Optional[Shape4D]] = [None, None] # read shape for [ifm, ifm2]
self.rounding_mode: Optional[NpuRoundingMode] = None
# Rescale op in TOSA supplies explicit multiplier and shift values
self.explicit_scaling: Optional[ExplicitScaling] = None
diff --git a/ethosu/vela/operation_util.py b/ethosu/vela/operation_util.py
index 29caf6d..36a8e59 100644
--- a/ethosu/vela/operation_util.py
+++ b/ethosu/vela/operation_util.py
@@ -234,7 +234,7 @@ def create_binary_elementwise(
op_type: Op,
name: str,
ifm: Tensor,
- ifm2: Tensor,
+ ifm2: Optional[Tensor],
quantization: QuantizationParameters,
activation: Optional[ActivationFunction] = None,
dtype: Optional[DataType] = None,
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index fd32b65..3be2898 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py
@@ -21,6 +21,7 @@ import math
from collections import defaultdict
from enum import Enum
from enum import IntEnum
+from typing import cast
from typing import Dict
from typing import List
from typing import Optional
@@ -319,7 +320,7 @@ def generate_activation(emit: CommandStreamEmitter, activation: Optional[NpuActi
quantized_min = max(-128, quantized_min)
quantized_max = min(127, quantized_max)
else:
- activation_value = activation_op_map[act.op_type]
+ activation_value = cast(int, activation_op_map[act.op_type])
emit.cmd0_with_param(cmd0.NPU_SET_ACTIVATION, activation_value)
emit.cmd0_with_param(cmd0.NPU_SET_ACTIVATION_MIN, quantized_min)
emit.cmd0_with_param(cmd0.NPU_SET_ACTIVATION_MAX, quantized_max)
@@ -584,7 +585,7 @@ def get_arch_block_config(
block_config,
arch,
block_type,
- npu_op.ofm.shape,
+ shape3d_to_block(npu_op.ofm.shape),
ifm_shape,
ifm2_shape,
uses_scalar,
@@ -741,6 +742,8 @@ def generate_scaling_for_elementwise(emit: CommandStreamEmitter, npu_op: NpuElem
ofm_scale, shift = scaling.elementwise_mul_scale(input_scale, input2_scale, output_scale)
emit.cmd1_with_offset(cmd1.NPU_SET_OFM_SCALE, ofm_scale, shift)
else: # Add/Sub
+ opa_scale: float
+ opb_scale: float
bitdepth = npu_op.ifm.data_type.size_in_bits()
use_advanced_scaling = False
if None in (input_scale, input2_scale, output_scale):
@@ -799,7 +802,7 @@ def generate_scaling_for_elementwise(emit: CommandStreamEmitter, npu_op: NpuElem
# -------------------------------------------------------------------
-def print_feature_map(fm: NpuFeatureMap, name: str):
+def print_feature_map(fm: Optional[NpuFeatureMap], name: str):
if fm is not None:
q = (
"no quantization"
diff --git a/ethosu/vela/register_command_stream_util.py b/ethosu/vela/register_command_stream_util.py
index 3751d88..83126ea 100644
--- a/ethosu/vela/register_command_stream_util.py
+++ b/ethosu/vela/register_command_stream_util.py
@@ -163,7 +163,7 @@ def get_h_ranges(
return [get_address_range(fm, strides, y, x0, c0, y, x1, c1) for y in range(y0, y1 + 1)]
-def get_address_ranges_for_area(fm: NpuFeatureMap, start: PointXYZ, end: PointXYZ) -> List[NpuAddressRange]:
+def get_address_ranges_for_area(fm: NpuFeatureMap, start: PointXYZ, end: PointXYZ) -> List[Optional[NpuAddressRange]]:
"""
Returns a list of adddress ranges that covers the area start - end (inclusive).
Divides the area in horizontal "stripes" of height 1, and returns the address ranges for these "stripes".
@@ -183,7 +183,7 @@ def get_address_ranges_for_area(fm: NpuFeatureMap, start: PointXYZ, end: PointXY
h, w, c = fm.shape
y0, x0, c0 = start.y, start.x, start.z
y1, x1, c1 = min(end.y, h - 1), min(end.x, w - 1), min(end.z, c - 1)
- ranges = []
+ ranges: List[Optional[NpuAddressRange]] = []
if x0 < width_0 and y0 < height_0:
# Horizontal ranges for tile 0
ranges.extend(get_h_ranges(fm, strides, y0, x0, c0, min(y1, height_0 - 1), min(x1, width_0 - 1), c1))
@@ -373,7 +373,7 @@ def intersects(
else:
# The OFM produces a part of the IFM (e.g. a stripe), or the IFM consumes part of the OFM.
# In this case, address comparison between the two areas is needed
- ifm_ranges = get_address_ranges_for_area(ifm, ifm_start_coord, ifm_end_coord)
+ ifm_ranges: List[Optional[NpuAddressRange]] = get_address_ranges_for_area(ifm, ifm_start_coord, ifm_end_coord)
prev_ofm_ranges = get_address_ranges_for_area(prev_ofm, ofm_start_coord, ofm_end_coord)
res = range_lists_overlap(ifm_ranges, prev_ofm_ranges)
return res
diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py
index 284848f..73133bc 100644
--- a/ethosu/vela/scheduler.py
+++ b/ethosu/vela/scheduler.py
@@ -17,6 +17,9 @@
# Description:
# The scheduler creates and searches for an optimal plan for the network, selecting block configurations and
# subdivisions for the Operators
+# For Class name forward references for the type annotations. (see PEP 563).
+from __future__ import annotations
+
import copy
from collections import namedtuple
from enum import auto
@@ -25,6 +28,11 @@ from typing import Dict
from typing import List
from typing import Optional
from typing import Tuple
+from typing import TYPE_CHECKING
+
+# Import needed for Type annotations. Only import for Type checking to avoid run-time errors due to cyclic import.
+if TYPE_CHECKING:
+ from .npu_performance import CycleCost
import numpy as np
@@ -57,6 +65,7 @@ from .tensor import Tensor
from .tensor import TensorFormat
from .tensor import TensorPurpose
from .tensor import TensorSubPurpose
+from .weight_compressor import NpuWeightTensor
def shape_for_format(shape: Shape4D, tensor_format: TensorFormat) -> Shape4D:
@@ -95,10 +104,10 @@ class SchedulerOpInfo:
self.cascade = 0 # Assigned by CascadeBuilder. 0 means not part of a cascade
self.time_index = None # Set by update_op_memory_snapshot
self.ofm_depth_slices: List[int] = [0, stripe.depth]
- self.npu_weights_tensor = None
- self.npu_scales_tensor = None
- self.buffered_weight_tensor = None
- self.cycles = None
+ self.npu_weights_tensor: Optional[NpuWeightTensor] = None
+ self.npu_scales_tensor: Optional[NpuWeightTensor] = None
+ self.buffered_weight_tensor: Optional[Tensor] = None
+ self.cycles: Optional[CycleCost] = None
self.slack_buffering_cycles = 0
self.slack_buffering_memory = 0
self.full_weight_transfer_cycles = 0
@@ -230,7 +239,7 @@ class SchedulerOperation:
def create_scheduler_info(self, nng: Graph, stripe: Shape4D) -> SchedulerOpInfo:
"""Returns schedule info about this SchedulerOperation based on how many ofm elements it should produce"""
ifm_shape = self.ifm.shape
- ifm2_shape = self.ifm2 and self.ifm2.shape
+ ifm2_shape = self.ifm2.shape if self.ifm2 is not None else None
ofm_shape = stripe
if ofm_shape != self.ofm.shape:
@@ -273,14 +282,14 @@ class SchedulerOperation:
return get_ifm_area_required(ofm_shape_to_produce, self.kernel, self.resampling_mode)
- def _calculate_min_stripe_input(self) -> Shape4D:
+ def _calculate_min_stripe_input(self) -> Tuple[int, int]:
# Calculate the input volume required height and width for the smallest possible stripe (h,w = 1,1)
min_stripe = self.ofm.shape.with_hw(1, 1)
return self._get_stripe_input_requirement(min_stripe)
def _get_block_config(
self, ifm_shape: Shape4D, ifm2_shape: Optional[Shape4D], uses_scalar: bool, ofm_shape: Shape4D
- ) -> ArchitectureBlockConfig:
+ ) -> Optional[ArchitectureBlockConfig]:
# Returns a block config and SHRAM layout
lut_banks = 2 if self.parent_op.activation_lut else 0
return find_block_config(
@@ -325,7 +334,7 @@ class Schedule:
self.cost_map: Dict[SchedulerOperation, SchedulerOpInfo] = {}
self.cascades: Dict[int, CascadeInfo] = {}
self.fast_storage_peak_usage = 0
- self.memory_snapshot = None
+ self.memory_snapshot: Optional[List[int]] = None
@property
def name(self):
@@ -340,7 +349,7 @@ class Scheduler:
self.sg = sg
self.arch = arch
self.sched_ops: List[SchedulerOperation] = []
- self.max_schedule = None
+ self.max_schedule: Optional[Schedule] = None
self.scheduler_options = options
def avoid_nhcwb16_for_ofm(self, tens, ps, arch):
@@ -524,7 +533,7 @@ class Scheduler:
def propose_operator_buffering(
self,
sched_op: SchedulerOperation,
- prev_op: SchedulerOperation,
+ prev_op: Optional[SchedulerOperation],
buffered_schedule: Schedule,
ref_schedule: Schedule,
staging_limit_bytes,
@@ -605,7 +614,7 @@ class Scheduler:
cost.npu_scales_tensor = full_scales
return
- encoded_weights = full_weights
+ encoded_weights: Optional[NpuWeightTensor] = full_weights
encoded_scales = full_scales
# How many NPU cycles are available under the previously executing
@@ -681,7 +690,7 @@ class Scheduler:
cost.block_config,
cost.ofm_depth_slices,
)
-
+ assert encoded_weights is not None
# Chosen buffering might not fit at all, iterate until it does
# or until the minimum usable slice size is reached
if (
@@ -747,7 +756,7 @@ class Scheduler:
cost_map = min_schedule.cost_map
# Keep track of the previous Op - which consumes the current Op's OFM
- prev_op = None
+ prev_op: Optional[SchedulerOperation] = None
for sched_op in reversed(self.sched_ops):
min_stripe_height = prev_op.kernel.stride.y if prev_op else 1
min_stripe = sched_op.ofm.shape.with_height(min_stripe_height)
diff --git a/ethosu/vela/tensor.py b/ethosu/vela/tensor.py
index 82de897..19016a0 100644
--- a/ethosu/vela/tensor.py
+++ b/ethosu/vela/tensor.py
@@ -420,7 +420,7 @@ class Tensor:
self.ifm_write_protected = False
# Reference to parent-tensor if this tensor is a clone
- self.src_tensor = None
+ self.src_tensor: Optional[Tensor] = None
@property
def address(self) -> int:
diff --git a/ethosu/vela/tensor_allocation.py b/ethosu/vela/tensor_allocation.py
index c82140c..c8b5129 100644
--- a/ethosu/vela/tensor_allocation.py
+++ b/ethosu/vela/tensor_allocation.py
@@ -91,7 +91,7 @@ def verify_allocation(live_ranges: LiveRangeGraph, alignment: int):
verify_alignment(live_ranges, alignment)
nr_time_slots = 1 + max(lr.end_time for lr in live_ranges.lrs)
# Contains active live ranges at each timestamp
- lrs_at_time = [[] for i in range(nr_time_slots)]
+ lrs_at_time: List[List[LiveRange]] = [[] for i in range(nr_time_slots)]
for lr in live_ranges.lrs:
for t in range(lr.start_time, lr.end_time + 1):
lrs_at_time[t].append(lr)
diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py
index 6881703..22fe512 100644
--- a/ethosu/vela/weight_compressor.py
+++ b/ethosu/vela/weight_compressor.py
@@ -17,6 +17,8 @@
# Compresses and pads the weigths. It also calculates the scales and packs with the biases.
from collections import namedtuple
from collections import OrderedDict
+from typing import Dict
+from typing import Optional
from typing import Tuple
import numpy as np
@@ -75,7 +77,7 @@ class NpuWeightTensor(Tensor):
class CompressedWeightCache:
"""Global tensor weight compression cache"""
- cache = {}
+ cache: Dict[WeightCompressionConfig, Tensor] = {}
@staticmethod
def get_tensor_with_same_compression(wcc):
@@ -279,7 +281,7 @@ def _prepare_scale_and_bias(arch, tens, rescale_for_faf, explicit_scaling):
def encode_weight_and_scale_tensor(
arch, op, weight_tens, scale_tens, kernel, block_config, depth_offsets, rescale_for_faf=False
-) -> (NpuWeightTensor, NpuWeightTensor):
+) -> Tuple[Optional[NpuWeightTensor], Optional[NpuWeightTensor]]:
npu_block_type = op.type.npu_block_type
ifm_scale = scale_tens and scale_tens.consumer_list[0].get_input_quantization().scale_f32
diff --git a/setup.py b/setup.py
index 488d296..031401e 100644
--- a/setup.py
+++ b/setup.py
@@ -98,6 +98,6 @@ setup(
],
entry_points={"console_scripts": ["vela = ethosu.vela.vela:main"]},
ext_modules=[mlw_module],
- cmdclass={"build_ext": BuildExtension},
+ cmdclass={"build_ext": BuildExtension}, # type: ignore[dict-item]
setup_requires=["numpy>=1.16.6,<=1.19.5", "setuptools_scm"],
)