aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/npu_performance.py
diff options
context:
space:
mode:
Diffstat (limited to 'ethosu/vela/npu_performance.py')
-rw-r--r--ethosu/vela/npu_performance.py15
1 files changed, 7 insertions, 8 deletions
diff --git a/ethosu/vela/npu_performance.py b/ethosu/vela/npu_performance.py
index 84cc4931..11f1e92b 100644
--- a/ethosu/vela/npu_performance.py
+++ b/ethosu/vela/npu_performance.py
@@ -23,12 +23,13 @@
# estimate.
import enum
-from . import numeric_util
+
import numpy as np
-from .tensor import TensorPurpose, MemArea, TensorFormat, shape_num_elements, Tensor, TensorBlockTraversal
-from .operation import Operation
-from .data_type import DataType, BaseType
-from .nn_graph import PassPlacement, NpuBlockType, SchedulerRewrite, Pass
+
+from . import numeric_util
+from .tensor import TensorPurpose, MemArea, shape_num_elements, TensorBlockTraversal
+from .nn_graph import PassPlacement, SchedulerRewrite
+from .operation import NpuBlockType
from .architecture_features import Block, Kernel
@@ -357,9 +358,7 @@ def performance_metrics_for_pass(arch, ps, block_config=None, rewrite_list=[], f
n_kernel_xy, 4
) # need at least 4, as this is the minimum duty cycle for secondary accumulator writes
if weight_tensor is not None:
- n_kernel_xy = numeric_util.round_up(
- n_kernel_xy, 4
- ) # weights need to be read in blocks of 4
+ n_kernel_xy = numeric_util.round_up(n_kernel_xy, 4) # weights need to be read in blocks of 4
num_mac_ops = 0
for n_blocks_for_size, block_size in block_setup: