From 845e23200d471e44f274940846e400d170b5ff37 Mon Sep 17 00:00:00 2001 From: Jonas Ohlsson Date: Tue, 1 Mar 2022 12:39:55 +0100 Subject: MLBEDSW-3367 Add mypy to pre-commit Add mypy to pre-commit and clean up all reported errors. Signed-off-by: Jonas Ohlsson Change-Id: If7dc869f5fecdb0e2db40f14e7d9db21aa33df71 --- .pre-commit-config.yaml | 9 ++++++ ethosu/mlw_codec/test/test_mlw_codec.py | 5 +++- ethosu/vela/architecture_allocator.py | 36 ++++++++++++++---------- ethosu/vela/high_level_command_stream.py | 5 ++-- ethosu/vela/high_level_command_to_npu_op.py | 11 +++++--- ethosu/vela/hillclimb_allocation.py | 6 ++-- ethosu/vela/nn_graph.py | 4 +++ ethosu/vela/npu_performance.py | 3 +- ethosu/vela/operation.py | 19 +++++++------ ethosu/vela/operation_util.py | 2 +- ethosu/vela/register_command_stream_generator.py | 9 ++++-- ethosu/vela/register_command_stream_util.py | 6 ++-- ethosu/vela/scheduler.py | 35 ++++++++++++++--------- ethosu/vela/tensor.py | 2 +- ethosu/vela/tensor_allocation.py | 2 +- ethosu/vela/weight_compressor.py | 6 ++-- setup.py | 2 +- 17 files changed, 102 insertions(+), 60 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8e976b65..ae2bae58 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,14 @@ exclude: '^ethosu/vela/(tflite|ethos_u55_regs|tosa)/' repos: +- repo: https://github.com/pre-commit/mirrors-mypy + rev: 'v0.931' + hooks: + - id: mypy + args: ["--no-strict-optional", "--show-error-codes", "--ignore-missing-imports"] + require_serial: true + additional_dependencies: [types-setuptools] + minimum_pre_commit_version: '2.9.2' + - repo: https://github.com/asottile/reorder_python_imports rev: v2.2.0 hooks: diff --git a/ethosu/mlw_codec/test/test_mlw_codec.py b/ethosu/mlw_codec/test/test_mlw_codec.py index 18c828a3..3ff26e53 100644 --- a/ethosu/mlw_codec/test/test_mlw_codec.py +++ b/ethosu/mlw_codec/test/test_mlw_codec.py @@ -15,6 +15,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # Simple example of the usage of mlw_codec. +from typing import Any +from typing import List + import pytest from ethosu import mlw_codec @@ -68,7 +71,7 @@ class TestMLWCodec: with pytest.raises(Exception): mlw_codec.encode(input) - invalid_decode_test_data = [None, 3, []] + invalid_decode_test_data: List[Any] = [None, 3, []] @pytest.mark.parametrize("input", invalid_decode_test_data) def test_decode_invalid_input(self, input): diff --git a/ethosu/vela/architecture_allocator.py b/ethosu/vela/architecture_allocator.py index 84d8354b..d27f1264 100644 --- a/ethosu/vela/architecture_allocator.py +++ b/ethosu/vela/architecture_allocator.py @@ -17,8 +17,10 @@ # Description: Architecture SHRAM allocator import enum import math +from typing import Dict from typing import Optional from typing import Tuple +from typing import Union from .architecture_features import ArchitectureFeatures from .architecture_features import Block @@ -77,8 +79,8 @@ class ElementwiseUsage(enum.IntEnum): def _try_block_config( shram: SHRAMConfig, ew_usage: ElementwiseUsage, - ofm_block: Block, - ifm_block: Block, + ofm_block: Union[Shape4D, Block], + ifm_block: Union[Shape4D, Block], ifm_bits: int, ifm_granule: int, acc_bits: int, @@ -86,7 +88,7 @@ def _try_block_config( lut_banks: int, ifm_depth_buf_scaling: int, cores: int, -) -> SHRAMLayout: +) -> Union[SHRAMLayout, None]: assert (acc_bits > 0) and (acc_granule > 0) assert (ifm_bits >= 8) and ((ifm_bits % 8) == 0) and (ifm_granule > 0) @@ -173,7 +175,7 @@ def to_upscale(ifm_resampling: resampling_mode) -> int: return 1 if ifm_resampling == resampling_mode.NONE else 2 -def _ifm_blockdepth(arch, ifm_shape: Shape4D, ifm_bits: int, is_partkernel: bool): +def _ifm_blockdepth(arch, ifm_shape: Union[Shape4D, Block], ifm_bits: int, is_partkernel: bool): if ifm_bits == 16: ifm_blockdepth = round_up(min(ifm_shape.depth, 16), 4) else: @@ -185,7 +187,9 @@ def _required_size(value: int, stride: int, border: int, upscale: int, nearest: return int(math.ceil(((value - 1) * stride + border + nearest) / upscale)) -def get_ifm_area_required(ofm_shape: Shape4D, kernel: Kernel, resampling_mode: resampling_mode) -> Tuple[int, int]: +def get_ifm_area_required( + ofm_shape: Union[Shape4D, Block], kernel: Kernel, resampling_mode: resampling_mode +) -> Tuple[int, int]: upscale = to_upscale(resampling_mode) nearest = is_nearest(resampling_mode) h1 = _required_size(ofm_shape.height, kernel.stride.y, kernel.area_height(), upscale, nearest) @@ -194,7 +198,7 @@ def get_ifm_area_required(ofm_shape: Shape4D, kernel: Kernel, resampling_mode: r def _get_ifm_blocksize( - ofm_block: Shape4D, kernel: Kernel, ublock: Block, subkernel_limit: Block, upscale: int, nearest: bool + ofm_block: Union[Shape4D, Block], kernel: Kernel, ublock: Block, subkernel_limit: Block, upscale: int, nearest: bool ) -> Shape4D: # IFM block height h1 = _required_size( @@ -213,7 +217,9 @@ def _get_ifm_blocksize( return Shape4D(1, height, width, ofm_block.depth) -def fit_block_for_ofm(arch: ArchitectureFeatures, ofm_shape: Shape4D, kernel: Kernel, block: Shape4D): +def fit_block_for_ofm( + arch: ArchitectureFeatures, ofm_shape: Union[Shape4D, Block], kernel: Kernel, block: Union[Shape4D, Block] +): # 256/512 Conv1D optimisation (ratio of IFM:Accumulators changes) This is a specific # interpretation of a more general constraint that can't be applied because the # find_block_config function must return block configs that can be applied to any OFM shape. @@ -227,14 +233,14 @@ def find_block_config( npu_op_type: NpuBlockType, ofm_shape: Shape4D, ifm_shape: Shape4D, - ifm2_shape: Shape4D, + ifm2_shape: Optional[Shape4D], uses_scalar: bool, ifm_bits: int, kernel: Kernel, lut_banks: int, scaled: bool, ifm_resampling: resampling_mode, -) -> ArchitectureBlockConfig: +) -> Optional[ArchitectureBlockConfig]: SplitDepth = ArchitectureFeatures.OFMSplitDepth # Elementwise larger-volume correction if ifm2_shape is not None and ifm2_shape.elements() > ifm_shape.elements(): @@ -296,7 +302,7 @@ def find_block_config( depth = round_up(depth, SplitDepth) while depth <= search_space.depth: - wont_fit = {} + wont_fit: Dict[Tuple[int, int], bool] = {} for height in range(arch.ofm_ublock.height, search_space.height + 1, arch.ofm_ublock.height): for width in range(arch.ofm_ublock.width, search_space.width + 1, arch.ofm_ublock.width): # Avoid checking W/H transposed blocks that already didn't fit. i.e. if 8x4x16 didn't @@ -315,8 +321,8 @@ def find_block_config( layout = _try_block_config( arch.shram, ew_usage, - ofm_block, - ifm_block, + Block(ofm_block.width, ofm_block.height, ofm_block.depth), + Block(ifm_block.width, ifm_block.height, ifm_block.depth), ifm_bits, ifm_granule, acc_bits, @@ -385,9 +391,9 @@ def try_block_config( block_config: Block, arch: ArchitectureFeatures, npu_op_type: NpuBlockType, - ofm_shape: Block, - ifm_shape: Block, - ifm2_shape: Optional[Block], + ofm_shape: Union[Shape4D, Block], + ifm_shape: Union[Shape4D, Block], + ifm2_shape: Optional[Union[Shape4D, Block]], uses_scalar: bool, ifm_bits: int, is_partkernel: bool, diff --git a/ethosu/vela/high_level_command_stream.py b/ethosu/vela/high_level_command_stream.py index 7e60221d..0009f6cf 100644 --- a/ethosu/vela/high_level_command_stream.py +++ b/ethosu/vela/high_level_command_stream.py @@ -16,6 +16,7 @@ # Description: # Contains classes that hold commands for the high-level command stream (one command per DMA or NPU stripe). from typing import List +from typing import Optional import numpy as np @@ -41,8 +42,8 @@ class Box: npu_block_type: NpuBlockType, concat_offsets: List[int], k_dilated_height: int, - split_offset: Shape4D = None, - split_shape: Shape4D = None, + split_offset: Optional[Shape4D] = None, + split_shape: Optional[Shape4D] = None, upscaling_factor: int = 1, ): new_start_coord = list(self.start_coord) diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py index 6c403c86..f7c91aa2 100644 --- a/ethosu/vela/high_level_command_to_npu_op.py +++ b/ethosu/vela/high_level_command_to_npu_op.py @@ -17,9 +17,11 @@ # Description: # Conversion from high level command to NpuOperation from enum import IntEnum +from typing import cast from typing import Dict from typing import List from typing import Optional +from typing import Tuple from .api import NpuActivation from .api import NpuActivationOp @@ -66,6 +68,7 @@ from .tensor import Tensor from .tensor import TensorFormat from .tensor import TensorPurpose from .tensor import TensorSubPurpose +from .weight_compressor import NpuWeightTensor from .weight_compressor import WeightKey @@ -294,17 +297,17 @@ def create_feature_map(tens: Tensor, box: Box, arch: ArchitectureFeatures, op_sh def create_weights( - weight_tensor: Tensor, weight_box: Box, scale_tensor: Tensor, arch: ArchitectureFeatures -) -> List[NpuAddressRange]: + weight_tensor: NpuWeightTensor, weight_box: Box, scale_tensor: NpuWeightTensor, arch: ArchitectureFeatures +) -> Tuple[List[NpuAddressRange], List[NpuAddressRange]]: """Returns address ranges for weights and scales""" weights = [] biases = [] shared_region = get_region(weight_tensor.mem_type, arch) - scale_region = scale_tensor and get_region(scale_tensor.mem_type, arch) + scale_region = get_region(scale_tensor.mem_type, arch) if scale_tensor else 0 w_tensor_src = weight_tensor if weight_tensor.src_tensor: - w_tensor_src = weight_tensor.src_tensor + w_tensor_src = cast(NpuWeightTensor, weight_tensor.src_tensor) core_offset = 0 for core in range(0, arch.ncores): diff --git a/ethosu/vela/hillclimb_allocation.py b/ethosu/vela/hillclimb_allocation.py index 5e02dac0..2271fe9c 100644 --- a/ethosu/vela/hillclimb_allocation.py +++ b/ethosu/vela/hillclimb_allocation.py @@ -101,7 +101,7 @@ class HillClimbAllocator: LiveRangeInfo(id, lr.start_time, lr.end_time, lr.size, lr.get_alignment()) for id, lr in enumerate(live_ranges) ] - self.lrs_at_time = [] + self.lrs_at_time: List[List[LiveRangeInfo]] = [] # The available size (input to algorithm). self.available_size: int = 0 # The algorithm stops once the target size has been achieved @@ -227,8 +227,8 @@ class HillClimbAllocator: # - direct neighbours of the bottleneck live range # - direct and indirect predecessors of these neighbours + bottleneck # The turns at which these live ranges were allocated are put in the turns set. - turn_set = set() - turn_list = list() + turn_set: Set[int] = set() + turn_list: List[int] = list() self.add_predecessor_turns(turn_set, turn_list, max_lr) for lr2 in max_lr.neighbours: self.add_predecessor_turns(turn_set, turn_list, lr2) diff --git a/ethosu/vela/nn_graph.py b/ethosu/vela/nn_graph.py index 8a2517de..671843f3 100644 --- a/ethosu/vela/nn_graph.py +++ b/ethosu/vela/nn_graph.py @@ -130,6 +130,7 @@ class CascadedPass: self.predecessors = [] self.successors = [] self.sram_used = 0 + self.time = 0 def __str__(self): return "" % ( @@ -537,6 +538,9 @@ class Graph: self.total_npu_weights = 0 self.total_npu_encoded_weights = 0 self.weight_cache = None # See CompressedWeightCache + self.bandwidths = 0 + self.macs = 0 + self.cycles = 0 def get_root_subgraph(self): return self.subgraphs[0] diff --git a/ethosu/vela/npu_performance.py b/ethosu/vela/npu_performance.py index 34530ae8..8c4aee63 100644 --- a/ethosu/vela/npu_performance.py +++ b/ethosu/vela/npu_performance.py @@ -22,6 +22,7 @@ import copy from enum import auto from enum import IntEnum +from typing import Optional from typing import Set from uuid import UUID @@ -580,7 +581,7 @@ def update_summary_cycles(arch, bws, cycles): def estimate_full_op_performance( - arch, schedule: Schedule, op: SchedulerOperation, prev_op: SchedulerOperation, block_config + arch, schedule: Schedule, op: SchedulerOperation, prev_op: Optional[SchedulerOperation], block_config ): cycles_a = make_cycles_array() bws = make_bandwidth_array() diff --git a/ethosu/vela/operation.py b/ethosu/vela/operation.py index 277f2de5..5a6423d8 100644 --- a/ethosu/vela/operation.py +++ b/ethosu/vela/operation.py @@ -15,6 +15,9 @@ # limitations under the License. # Description: # Internal representation of a Neural Network Operation. +# For Class name forward references for the type annotations. (see PEP 563). +from __future__ import annotations + import copy from collections import namedtuple from enum import Enum @@ -24,13 +27,14 @@ from typing import List from typing import Optional from typing import Tuple from typing import TYPE_CHECKING +from typing import Union from .api import NpuRoundingMode from .errors import VelaError from .numeric_util import full_shape from .shape4d import Shape4D - +# Import needed for Type annotations. Only import for Type checking to avoid run-time errors due to cyclic import. if TYPE_CHECKING: from .tensor import Tensor @@ -80,9 +84,6 @@ class Kernel: def area_height(self) -> int: return (self.height - 1) * self.dilation.y + 1 - def dilation(self) -> PointXY: - return self.dilation - def dilated_wh(self) -> Tuple[int, int]: """Returns the dilated kernel width/height""" return self.dilation.x * (self.width - 1) + 1, self.dilation.y * (self.height - 1) + 1 @@ -443,7 +444,7 @@ def create_activation_function(op_type: Op, min=None, max=None) -> ActivationFun return act -def get_slice_offsets(input_shape: List[int], offset_tens: int, offset_mask: int, is_begin: bool = True): +def get_slice_offsets(input_shape: List[int], offset_tens: Tensor, offset_mask: int, is_begin: bool = True): # For strided slice operator: get start or end offsets offsets = len(input_shape) * [0] if is_begin else input_shape[:] for idx in range(len(input_shape)): @@ -493,7 +494,7 @@ class Operation: self.type = op_type self.name = name self.attrs: Dict[str, Any] = {} - self.inputs: List[Tensor] = [] + self.inputs: List[Optional[Tensor]] = [] self.outputs: List[Tensor] = [] self.intermediates: List[Tensor] = [] self.flops = 0 @@ -514,9 +515,9 @@ class Operation: self.ofm_shapes: List[Shape4D] = [] # If not none: contains rescale to be used as output scaling # (which overrides the ofm tensor's scale) - self.rescale = None - self.read_offsets: List[Shape4D] = [None, None] # offset for [ifm, ifm2] - self.read_shapes: List[Shape4D] = [None, None] # read shape for [ifm, ifm2] + self.rescale: Optional[Union[Tuple[int, int], ExplicitScaling]] = None + self.read_offsets: List[Optional[Shape4D]] = [None, None] # offset for [ifm, ifm2] + self.read_shapes: List[Optional[Shape4D]] = [None, None] # read shape for [ifm, ifm2] self.rounding_mode: Optional[NpuRoundingMode] = None # Rescale op in TOSA supplies explicit multiplier and shift values self.explicit_scaling: Optional[ExplicitScaling] = None diff --git a/ethosu/vela/operation_util.py b/ethosu/vela/operation_util.py index 29caf6d0..36a8e592 100644 --- a/ethosu/vela/operation_util.py +++ b/ethosu/vela/operation_util.py @@ -234,7 +234,7 @@ def create_binary_elementwise( op_type: Op, name: str, ifm: Tensor, - ifm2: Tensor, + ifm2: Optional[Tensor], quantization: QuantizationParameters, activation: Optional[ActivationFunction] = None, dtype: Optional[DataType] = None, diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py index fd32b655..3be2898c 100644 --- a/ethosu/vela/register_command_stream_generator.py +++ b/ethosu/vela/register_command_stream_generator.py @@ -21,6 +21,7 @@ import math from collections import defaultdict from enum import Enum from enum import IntEnum +from typing import cast from typing import Dict from typing import List from typing import Optional @@ -319,7 +320,7 @@ def generate_activation(emit: CommandStreamEmitter, activation: Optional[NpuActi quantized_min = max(-128, quantized_min) quantized_max = min(127, quantized_max) else: - activation_value = activation_op_map[act.op_type] + activation_value = cast(int, activation_op_map[act.op_type]) emit.cmd0_with_param(cmd0.NPU_SET_ACTIVATION, activation_value) emit.cmd0_with_param(cmd0.NPU_SET_ACTIVATION_MIN, quantized_min) emit.cmd0_with_param(cmd0.NPU_SET_ACTIVATION_MAX, quantized_max) @@ -584,7 +585,7 @@ def get_arch_block_config( block_config, arch, block_type, - npu_op.ofm.shape, + shape3d_to_block(npu_op.ofm.shape), ifm_shape, ifm2_shape, uses_scalar, @@ -741,6 +742,8 @@ def generate_scaling_for_elementwise(emit: CommandStreamEmitter, npu_op: NpuElem ofm_scale, shift = scaling.elementwise_mul_scale(input_scale, input2_scale, output_scale) emit.cmd1_with_offset(cmd1.NPU_SET_OFM_SCALE, ofm_scale, shift) else: # Add/Sub + opa_scale: float + opb_scale: float bitdepth = npu_op.ifm.data_type.size_in_bits() use_advanced_scaling = False if None in (input_scale, input2_scale, output_scale): @@ -799,7 +802,7 @@ def generate_scaling_for_elementwise(emit: CommandStreamEmitter, npu_op: NpuElem # ------------------------------------------------------------------- -def print_feature_map(fm: NpuFeatureMap, name: str): +def print_feature_map(fm: Optional[NpuFeatureMap], name: str): if fm is not None: q = ( "no quantization" diff --git a/ethosu/vela/register_command_stream_util.py b/ethosu/vela/register_command_stream_util.py index 3751d88e..83126ead 100644 --- a/ethosu/vela/register_command_stream_util.py +++ b/ethosu/vela/register_command_stream_util.py @@ -163,7 +163,7 @@ def get_h_ranges( return [get_address_range(fm, strides, y, x0, c0, y, x1, c1) for y in range(y0, y1 + 1)] -def get_address_ranges_for_area(fm: NpuFeatureMap, start: PointXYZ, end: PointXYZ) -> List[NpuAddressRange]: +def get_address_ranges_for_area(fm: NpuFeatureMap, start: PointXYZ, end: PointXYZ) -> List[Optional[NpuAddressRange]]: """ Returns a list of adddress ranges that covers the area start - end (inclusive). Divides the area in horizontal "stripes" of height 1, and returns the address ranges for these "stripes". @@ -183,7 +183,7 @@ def get_address_ranges_for_area(fm: NpuFeatureMap, start: PointXYZ, end: PointXY h, w, c = fm.shape y0, x0, c0 = start.y, start.x, start.z y1, x1, c1 = min(end.y, h - 1), min(end.x, w - 1), min(end.z, c - 1) - ranges = [] + ranges: List[Optional[NpuAddressRange]] = [] if x0 < width_0 and y0 < height_0: # Horizontal ranges for tile 0 ranges.extend(get_h_ranges(fm, strides, y0, x0, c0, min(y1, height_0 - 1), min(x1, width_0 - 1), c1)) @@ -373,7 +373,7 @@ def intersects( else: # The OFM produces a part of the IFM (e.g. a stripe), or the IFM consumes part of the OFM. # In this case, address comparison between the two areas is needed - ifm_ranges = get_address_ranges_for_area(ifm, ifm_start_coord, ifm_end_coord) + ifm_ranges: List[Optional[NpuAddressRange]] = get_address_ranges_for_area(ifm, ifm_start_coord, ifm_end_coord) prev_ofm_ranges = get_address_ranges_for_area(prev_ofm, ofm_start_coord, ofm_end_coord) res = range_lists_overlap(ifm_ranges, prev_ofm_ranges) return res diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py index 284848f5..73133bcd 100644 --- a/ethosu/vela/scheduler.py +++ b/ethosu/vela/scheduler.py @@ -17,6 +17,9 @@ # Description: # The scheduler creates and searches for an optimal plan for the network, selecting block configurations and # subdivisions for the Operators +# For Class name forward references for the type annotations. (see PEP 563). +from __future__ import annotations + import copy from collections import namedtuple from enum import auto @@ -25,6 +28,11 @@ from typing import Dict from typing import List from typing import Optional from typing import Tuple +from typing import TYPE_CHECKING + +# Import needed for Type annotations. Only import for Type checking to avoid run-time errors due to cyclic import. +if TYPE_CHECKING: + from .npu_performance import CycleCost import numpy as np @@ -57,6 +65,7 @@ from .tensor import Tensor from .tensor import TensorFormat from .tensor import TensorPurpose from .tensor import TensorSubPurpose +from .weight_compressor import NpuWeightTensor def shape_for_format(shape: Shape4D, tensor_format: TensorFormat) -> Shape4D: @@ -95,10 +104,10 @@ class SchedulerOpInfo: self.cascade = 0 # Assigned by CascadeBuilder. 0 means not part of a cascade self.time_index = None # Set by update_op_memory_snapshot self.ofm_depth_slices: List[int] = [0, stripe.depth] - self.npu_weights_tensor = None - self.npu_scales_tensor = None - self.buffered_weight_tensor = None - self.cycles = None + self.npu_weights_tensor: Optional[NpuWeightTensor] = None + self.npu_scales_tensor: Optional[NpuWeightTensor] = None + self.buffered_weight_tensor: Optional[Tensor] = None + self.cycles: Optional[CycleCost] = None self.slack_buffering_cycles = 0 self.slack_buffering_memory = 0 self.full_weight_transfer_cycles = 0 @@ -230,7 +239,7 @@ class SchedulerOperation: def create_scheduler_info(self, nng: Graph, stripe: Shape4D) -> SchedulerOpInfo: """Returns schedule info about this SchedulerOperation based on how many ofm elements it should produce""" ifm_shape = self.ifm.shape - ifm2_shape = self.ifm2 and self.ifm2.shape + ifm2_shape = self.ifm2.shape if self.ifm2 is not None else None ofm_shape = stripe if ofm_shape != self.ofm.shape: @@ -273,14 +282,14 @@ class SchedulerOperation: return get_ifm_area_required(ofm_shape_to_produce, self.kernel, self.resampling_mode) - def _calculate_min_stripe_input(self) -> Shape4D: + def _calculate_min_stripe_input(self) -> Tuple[int, int]: # Calculate the input volume required height and width for the smallest possible stripe (h,w = 1,1) min_stripe = self.ofm.shape.with_hw(1, 1) return self._get_stripe_input_requirement(min_stripe) def _get_block_config( self, ifm_shape: Shape4D, ifm2_shape: Optional[Shape4D], uses_scalar: bool, ofm_shape: Shape4D - ) -> ArchitectureBlockConfig: + ) -> Optional[ArchitectureBlockConfig]: # Returns a block config and SHRAM layout lut_banks = 2 if self.parent_op.activation_lut else 0 return find_block_config( @@ -325,7 +334,7 @@ class Schedule: self.cost_map: Dict[SchedulerOperation, SchedulerOpInfo] = {} self.cascades: Dict[int, CascadeInfo] = {} self.fast_storage_peak_usage = 0 - self.memory_snapshot = None + self.memory_snapshot: Optional[List[int]] = None @property def name(self): @@ -340,7 +349,7 @@ class Scheduler: self.sg = sg self.arch = arch self.sched_ops: List[SchedulerOperation] = [] - self.max_schedule = None + self.max_schedule: Optional[Schedule] = None self.scheduler_options = options def avoid_nhcwb16_for_ofm(self, tens, ps, arch): @@ -524,7 +533,7 @@ class Scheduler: def propose_operator_buffering( self, sched_op: SchedulerOperation, - prev_op: SchedulerOperation, + prev_op: Optional[SchedulerOperation], buffered_schedule: Schedule, ref_schedule: Schedule, staging_limit_bytes, @@ -605,7 +614,7 @@ class Scheduler: cost.npu_scales_tensor = full_scales return - encoded_weights = full_weights + encoded_weights: Optional[NpuWeightTensor] = full_weights encoded_scales = full_scales # How many NPU cycles are available under the previously executing @@ -681,7 +690,7 @@ class Scheduler: cost.block_config, cost.ofm_depth_slices, ) - + assert encoded_weights is not None # Chosen buffering might not fit at all, iterate until it does # or until the minimum usable slice size is reached if ( @@ -747,7 +756,7 @@ class Scheduler: cost_map = min_schedule.cost_map # Keep track of the previous Op - which consumes the current Op's OFM - prev_op = None + prev_op: Optional[SchedulerOperation] = None for sched_op in reversed(self.sched_ops): min_stripe_height = prev_op.kernel.stride.y if prev_op else 1 min_stripe = sched_op.ofm.shape.with_height(min_stripe_height) diff --git a/ethosu/vela/tensor.py b/ethosu/vela/tensor.py index 82de8973..19016a0f 100644 --- a/ethosu/vela/tensor.py +++ b/ethosu/vela/tensor.py @@ -420,7 +420,7 @@ class Tensor: self.ifm_write_protected = False # Reference to parent-tensor if this tensor is a clone - self.src_tensor = None + self.src_tensor: Optional[Tensor] = None @property def address(self) -> int: diff --git a/ethosu/vela/tensor_allocation.py b/ethosu/vela/tensor_allocation.py index c82140c5..c8b5129d 100644 --- a/ethosu/vela/tensor_allocation.py +++ b/ethosu/vela/tensor_allocation.py @@ -91,7 +91,7 @@ def verify_allocation(live_ranges: LiveRangeGraph, alignment: int): verify_alignment(live_ranges, alignment) nr_time_slots = 1 + max(lr.end_time for lr in live_ranges.lrs) # Contains active live ranges at each timestamp - lrs_at_time = [[] for i in range(nr_time_slots)] + lrs_at_time: List[List[LiveRange]] = [[] for i in range(nr_time_slots)] for lr in live_ranges.lrs: for t in range(lr.start_time, lr.end_time + 1): lrs_at_time[t].append(lr) diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py index 68817035..22fe512e 100644 --- a/ethosu/vela/weight_compressor.py +++ b/ethosu/vela/weight_compressor.py @@ -17,6 +17,8 @@ # Compresses and pads the weigths. It also calculates the scales and packs with the biases. from collections import namedtuple from collections import OrderedDict +from typing import Dict +from typing import Optional from typing import Tuple import numpy as np @@ -75,7 +77,7 @@ class NpuWeightTensor(Tensor): class CompressedWeightCache: """Global tensor weight compression cache""" - cache = {} + cache: Dict[WeightCompressionConfig, Tensor] = {} @staticmethod def get_tensor_with_same_compression(wcc): @@ -279,7 +281,7 @@ def _prepare_scale_and_bias(arch, tens, rescale_for_faf, explicit_scaling): def encode_weight_and_scale_tensor( arch, op, weight_tens, scale_tens, kernel, block_config, depth_offsets, rescale_for_faf=False -) -> (NpuWeightTensor, NpuWeightTensor): +) -> Tuple[Optional[NpuWeightTensor], Optional[NpuWeightTensor]]: npu_block_type = op.type.npu_block_type ifm_scale = scale_tens and scale_tens.consumer_list[0].get_input_quantization().scale_f32 diff --git a/setup.py b/setup.py index 488d2966..031401e7 100644 --- a/setup.py +++ b/setup.py @@ -98,6 +98,6 @@ setup( ], entry_points={"console_scripts": ["vela = ethosu.vela.vela:main"]}, ext_modules=[mlw_module], - cmdclass={"build_ext": BuildExtension}, + cmdclass={"build_ext": BuildExtension}, # type: ignore[dict-item] setup_requires=["numpy>=1.16.6,<=1.19.5", "setuptools_scm"], ) -- cgit v1.2.1