aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDiego Russo <diego.russo@arm.com>2020-04-14 18:41:58 +0100
committerTim Hall <tim.hall@arm.com>2020-06-18 17:53:52 +0100
commitea6111a36e55501bbfb9ea022aaf8305b4d80183 (patch)
treef6787f7d07ea56a502616a8d6f36bc9c7e6e4efe
parent2213e90570af328418d4f4a0d54269ed21dc40bc (diff)
downloadethos-u-vela-ea6111a36e55501bbfb9ea022aaf8305b4d80183.tar.gz
Add pre-commit support for sanity checks
Use pre-commit framework [1] to run black and flake8 before the commit. black and flake8 are managed by the pre-commit framework and they can be run manually by the user using `pre-commit run` command. Fix the code base with the help of black and flake8. Fix import statements according to PEP8 guidelines [1] Both tools have the following settings (specified in the pre-commit configuration file): * line length: 120 characters * directory to exclude: ethosu/vela/tflite/ and ethosu/vela/ethos_u55_regs Updated README.md on how to install pre-commit and how to run sanity checks. Pipenv files have been updated including new dependencies for pre-commit. [1]: https://www.python.org/dev/peps/pep-0008/#imports [2]: https://github.com/pre-commit/pre-commit Change-Id: I304d9fffdf019d390ffa396a529c8a7c2437f63d Signed-off-by: Diego Russo <diego.russo@arm.com>
-rw-r--r--.pre-commit-config.yaml14
-rw-r--r--Pipfile2
-rw-r--r--Pipfile.lock151
-rw-r--r--README.md42
-rw-r--r--ethosu/vela/_version.py2
-rw-r--r--ethosu/vela/architecture_features.py22
-rw-r--r--ethosu/vela/compiler_driver.py7
-rw-r--r--ethosu/vela/data_type.py3
-rw-r--r--ethosu/vela/driver_actions.py18
-rw-r--r--ethosu/vela/extract_npu_subgraphs.py5
-rw-r--r--ethosu/vela/graph_optimiser.py29
-rw-r--r--ethosu/vela/high_level_command_stream.py8
-rw-r--r--ethosu/vela/high_level_command_stream_generator.py3
-rw-r--r--ethosu/vela/insert_dma.py5
-rw-r--r--ethosu/vela/live_range.py12
-rw-r--r--ethosu/vela/mark_tensors.py5
-rw-r--r--ethosu/vela/nn_graph.py3
-rw-r--r--ethosu/vela/npu_performance.py15
-rw-r--r--ethosu/vela/npu_serialisation.py12
-rw-r--r--ethosu/vela/numeric_util.py1
-rw-r--r--ethosu/vela/pass_packing.py29
-rw-r--r--ethosu/vela/range_set.py1
-rw-r--r--ethosu/vela/register_command_stream_generator.py27
-rw-r--r--ethosu/vela/scaling.py3
-rw-r--r--ethosu/vela/scheduler.py30
-rw-r--r--ethosu/vela/shared_buffer_allocation.py5
-rw-r--r--ethosu/vela/stats_writer.py11
-rw-r--r--ethosu/vela/tensor.py7
-rw-r--r--ethosu/vela/tensor_allocation.py9
-rw-r--r--ethosu/vela/tflite_mapping.py17
-rw-r--r--ethosu/vela/tflite_reader.py28
-rw-r--r--ethosu/vela/tflite_writer.py18
-rw-r--r--ethosu/vela/vela.py8
-rw-r--r--ethosu/vela/weight_compressor.py13
34 files changed, 376 insertions, 189 deletions
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..0695effb
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,14 @@
+exclude: '^ethosu/vela/(tflite|ethos_u55_regs)/'
+repos:
+- repo: https://github.com/ambv/black
+ rev: stable
+ hooks:
+ - id: black
+ language_version: python3.6
+ args: [--line-length=120]
+
+- repo: https://gitlab.com/pycqa/flake8
+ rev: 3.7.9
+ hooks:
+ - id: flake8
+ args: [--max-line-length=120, --extend-ignore=E203]
diff --git a/Pipfile b/Pipfile
index 300bef65..33f941f5 100644
--- a/Pipfile
+++ b/Pipfile
@@ -4,6 +4,8 @@ url = "https://pypi.org/simple"
verify_ssl = true
[dev-packages]
+pre-commit = "*"
+ethos-u-vela = {editable = true,path = "."}
[packages]
ethos-u-vela = {editable = true,path = "."}
diff --git a/Pipfile.lock b/Pipfile.lock
index 6fa01549..c745f93f 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
- "sha256": "2d930644f3f81f11dae3317cae890fe083479342c80da44161b46ac83d6972d5"
+ "sha256": "84a8b4a6f5aa912c80e0cadffe26bc8575705edc0730d833db4f0984789ac288"
},
"pipfile-spec": 6,
"requires": {},
@@ -52,5 +52,152 @@
"version": "==1.18.2"
}
},
- "develop": {}
+ "develop": {
+ "appdirs": {
+ "hashes": [
+ "sha256:9e5896d1372858f8dd3344faf4e5014d21849c756c8d5701f78f8a103b372d92",
+ "sha256:d8b24664561d0d34ddfaec54636d502d7cea6e29c3eaf68f3df6180863e2166e"
+ ],
+ "version": "==1.4.3"
+ },
+ "cfgv": {
+ "hashes": [
+ "sha256:1ccf53320421aeeb915275a196e23b3b8ae87dea8ac6698b1638001d4a486d53",
+ "sha256:c8e8f552ffcc6194f4e18dd4f68d9aef0c0d58ae7e7be8c82bee3c5e9edfa513"
+ ],
+ "version": "==3.1.0"
+ },
+ "distlib": {
+ "hashes": [
+ "sha256:2e166e231a26b36d6dfe35a48c4464346620f8645ed0ace01ee31822b288de21"
+ ],
+ "version": "==0.3.0"
+ },
+ "ethos-u-vela": {
+ "editable": true,
+ "path": "."
+ },
+ "filelock": {
+ "hashes": [
+ "sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59",
+ "sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836"
+ ],
+ "version": "==3.0.12"
+ },
+ "flatbuffers": {
+ "hashes": [
+ "sha256:776a959c5f70b41819fa75de44ed14fd984fa1a79b378f27e6f4fff338cbdca2",
+ "sha256:f24185db54193540e3d684dc98aa7c2d89882341641548ceb36fd2589fef6c4e"
+ ],
+ "version": "==1.11.0"
+ },
+ "identify": {
+ "hashes": [
+ "sha256:2bb8760d97d8df4408f4e805883dad26a2d076f04be92a10a3e43f09c6060742",
+ "sha256:faffea0fd8ec86bb146ac538ac350ed0c73908326426d387eded0bcc9d077522"
+ ],
+ "version": "==1.4.14"
+ },
+ "importlib-metadata": {
+ "hashes": [
+ "sha256:2a688cbaa90e0cc587f1df48bdc97a6eadccdcd9c35fb3f976a09e3b5016d90f",
+ "sha256:34513a8a0c4962bc66d35b359558fd8a5e10cd472d37aec5f66858addef32c1e"
+ ],
+ "markers": "python_version < '3.8'",
+ "version": "==1.6.0"
+ },
+ "importlib-resources": {
+ "hashes": [
+ "sha256:4019b6a9082d8ada9def02bece4a76b131518866790d58fdda0b5f8c603b36c2",
+ "sha256:dd98ceeef3f5ad2ef4cc287b8586da4ebad15877f351e9688987ad663a0a29b8"
+ ],
+ "markers": "python_version < '3.7'",
+ "version": "==1.4.0"
+ },
+ "nodeenv": {
+ "hashes": [
+ "sha256:5b2438f2e42af54ca968dd1b374d14a1194848955187b0e5e4be1f73813a5212"
+ ],
+ "version": "==1.3.5"
+ },
+ "numpy": {
+ "hashes": [
+ "sha256:1598a6de323508cfeed6b7cd6c4efb43324f4692e20d1f76e1feec7f59013448",
+ "sha256:1b0ece94018ae21163d1f651b527156e1f03943b986188dd81bc7e066eae9d1c",
+ "sha256:2e40be731ad618cb4974d5ba60d373cdf4f1b8dcbf1dcf4d9dff5e212baf69c5",
+ "sha256:4ba59db1fcc27ea31368af524dcf874d9277f21fd2e1f7f1e2e0c75ee61419ed",
+ "sha256:59ca9c6592da581a03d42cc4e270732552243dc45e87248aa8d636d53812f6a5",
+ "sha256:5e0feb76849ca3e83dd396254e47c7dba65b3fa9ed3df67c2556293ae3e16de3",
+ "sha256:6d205249a0293e62bbb3898c4c2e1ff8a22f98375a34775a259a0523111a8f6c",
+ "sha256:6fcc5a3990e269f86d388f165a089259893851437b904f422d301cdce4ff25c8",
+ "sha256:82847f2765835c8e5308f136bc34018d09b49037ec23ecc42b246424c767056b",
+ "sha256:87902e5c03355335fc5992a74ba0247a70d937f326d852fc613b7f53516c0963",
+ "sha256:9ab21d1cb156a620d3999dd92f7d1c86824c622873841d6b080ca5495fa10fef",
+ "sha256:a1baa1dc8ecd88fb2d2a651671a84b9938461e8a8eed13e2f0a812a94084d1fa",
+ "sha256:a244f7af80dacf21054386539699ce29bcc64796ed9850c99a34b41305630286",
+ "sha256:a35af656a7ba1d3decdd4fae5322b87277de8ac98b7d9da657d9e212ece76a61",
+ "sha256:b1fe1a6f3a6f355f6c29789b5927f8bd4f134a4bd9a781099a7c4f66af8850f5",
+ "sha256:b5ad0adb51b2dee7d0ee75a69e9871e2ddfb061c73ea8bc439376298141f77f5",
+ "sha256:ba3c7a2814ec8a176bb71f91478293d633c08582119e713a0c5351c0f77698da",
+ "sha256:cd77d58fb2acf57c1d1ee2835567cd70e6f1835e32090538f17f8a3a99e5e34b",
+ "sha256:cdb3a70285e8220875e4d2bc394e49b4988bdb1298ffa4e0bd81b2f613be397c",
+ "sha256:deb529c40c3f1e38d53d5ae6cd077c21f1d49e13afc7936f7f868455e16b64a0",
+ "sha256:e7894793e6e8540dbeac77c87b489e331947813511108ae097f1715c018b8f3d"
+ ],
+ "version": "==1.18.2"
+ },
+ "pre-commit": {
+ "hashes": [
+ "sha256:487c675916e6f99d355ec5595ad77b325689d423ef4839db1ed2f02f639c9522",
+ "sha256:c0aa11bce04a7b46c5544723aedf4e81a4d5f64ad1205a30a9ea12d5e81969e1"
+ ],
+ "index": "pypi",
+ "version": "==2.2.0"
+ },
+ "pyyaml": {
+ "hashes": [
+ "sha256:06a0d7ba600ce0b2d2fe2e78453a470b5a6e000a985dd4a4e54e436cc36b0e97",
+ "sha256:240097ff019d7c70a4922b6869d8a86407758333f02203e0fc6ff79c5dcede76",
+ "sha256:4f4b913ca1a7319b33cfb1369e91e50354d6f07a135f3b901aca02aa95940bd2",
+ "sha256:69f00dca373f240f842b2931fb2c7e14ddbacd1397d57157a9b005a6a9942648",
+ "sha256:73f099454b799e05e5ab51423c7bcf361c58d3206fa7b0d555426b1f4d9a3eaf",
+ "sha256:74809a57b329d6cc0fdccee6318f44b9b8649961fa73144a98735b0aaf029f1f",
+ "sha256:7739fc0fa8205b3ee8808aea45e968bc90082c10aef6ea95e855e10abf4a37b2",
+ "sha256:95f71d2af0ff4227885f7a6605c37fd53d3a106fcab511b8860ecca9fcf400ee",
+ "sha256:b8eac752c5e14d3eca0e6dd9199cd627518cb5ec06add0de9d32baeee6fe645d",
+ "sha256:cc8955cfbfc7a115fa81d85284ee61147059a753344bc51098f3ccd69b0d7e0c",
+ "sha256:d13155f591e6fcc1ec3b30685d50bf0711574e2c0dfffd7644babf8b5102ca1a"
+ ],
+ "version": "==5.3.1"
+ },
+ "six": {
+ "hashes": [
+ "sha256:236bdbdce46e6e6a3d61a337c0f8b763ca1e8717c03b369e87a7ec7ce1319c0a",
+ "sha256:8f3cd2e254d8f793e7f3d6d9df77b92252b52637291d0f0da013c76ea2724b6c"
+ ],
+ "version": "==1.14.0"
+ },
+ "toml": {
+ "hashes": [
+ "sha256:229f81c57791a41d65e399fc06bf0848bab550a9dfd5ed66df18ce5f05e73d5c",
+ "sha256:235682dd292d5899d361a811df37e04a8828a5b1da3115886b73cf81ebc9100e"
+ ],
+ "version": "==0.10.0"
+ },
+ "virtualenv": {
+ "hashes": [
+ "sha256:00cfe8605fb97f5a59d52baab78e6070e72c12ca64f51151695407cc0eb8a431",
+ "sha256:c8364ec469084046c779c9a11ae6340094e8a0bf1d844330fc55c1cefe67c172"
+ ],
+ "version": "==20.0.17"
+ },
+ "zipp": {
+ "hashes": [
+ "sha256:aa36550ff0c0b7ef7fa639055d797116ee891440eac1a56f378e2d3179e0320b",
+ "sha256:c599e4d75c98f6798c509911d08a22e6c021d074469042177c8c86fb92eefd96"
+ ],
+ "markers": "python_version < '3.8'",
+ "version": "==3.1.0"
+ }
+ }
}
diff --git a/README.md b/README.md
index 03ad7fec..4d7db1a5 100644
--- a/README.md
+++ b/README.md
@@ -110,3 +110,45 @@ Contributions are accepted under [Apache License 2.0](LICENSE.txt). Only submit
## License
Vela is licensed under [Apache License 2.0](LICENSE.txt)
+## Contributions and Pull Requests
+
+Contributions are accepted under Apache-2.0. Only submit contributions where you have authored all of the code.
+
+### Sanity checks
+
+The Python codebase is PEP8 compliant with the exception of 120 characters line length.
+We run black and flake8 against the code base excluding "ethosu/vela/tflite/" and "ethosu/vela/ethos\_u55\_regs" directories because they are auto-generated by third party tools.
+Those tools are run using [pre-commit framework](https://pre-commit.com/). The configuration file is .pre-commit-config.yaml
+
+#### Install tools
+
+To install pre-commit, run the following:
+
+```
+pipenv install -e . --dev
+```
+
+After the installation, pre-commit is available in the virtual environment.
+
+#### Install the pre-commit hook
+
+To ease the development, we can run those sanity checks before committing the code.
+To install the git hook, run:
+
+```
+$ pre-commit install
+pre-commit installed at .git/hooks/pre-commit
+```
+
+The checks will be run before the commit: if one of them fails, you need to fix the code to make the checks pass.
+
+#### Run the sanity checks
+
+Those checks can be run manually. This can be achievied running the following
+```
+$ pre-commit run flake8 --all-files
+...
+$ pre-commit run black --all-files
+```
+
+If you don't specify anything after run, it will execute all the checks.
diff --git a/ethosu/vela/_version.py b/ethosu/vela/_version.py
index f3888c31..b670819d 100644
--- a/ethosu/vela/_version.py
+++ b/ethosu/vela/_version.py
@@ -16,4 +16,4 @@
import pkg_resources
-__version__ = pkg_resources.get_distribution("ethos-u-vela").version \ No newline at end of file
+__version__ = pkg_resources.get_distribution("ethos-u-vela").version
diff --git a/ethosu/vela/architecture_features.py b/ethosu/vela/architecture_features.py
index 51c632e0..69f95fa2 100644
--- a/ethosu/vela/architecture_features.py
+++ b/ethosu/vela/architecture_features.py
@@ -18,13 +18,17 @@
# Description:
# Holds a container for Ethos-U55/System architecture parameters.
-from .nn_graph import MemArea, TensorPurpose, NpuBlockType, TensorFormat
-from .numeric_util import round_up, round_up_divide
+import enum
from collections import namedtuple
from configparser import ConfigParser
-from .supported_operators import SupportedOperators
+
import numpy as np
-import enum
+
+from .tensor import MemArea, TensorPurpose, TensorFormat
+from .operation import NpuBlockType
+from .numeric_util import round_up, round_up_divide
+from .supported_operators import SupportedOperators
+
PointXY = namedtuple("PointXY", "x y")
PointXYZ = namedtuple("PointXYZ", "x y z")
@@ -151,7 +155,7 @@ Note the difference between ArchitectureFeatures and CompilerOptions
accelerator_config = accelerator_config.lower()
self.vela_config = vela_config
self.accelerator_config = accelerator_config
- if not self.accelerator_config in ArchitectureFeatures.accelerator_configs:
+ if self.accelerator_config not in ArchitectureFeatures.accelerator_configs:
raise Exception("Unknown accelerator configuration " + self.accelerator_config)
accel_config = ArchitectureFeatures.accelerator_configs[self.accelerator_config]
self.config = accel_config
@@ -450,7 +454,6 @@ Note the difference between ArchitectureFeatures and CompilerOptions
)
# Calculate how many IFM blocks this OFM block requires (i.e how many jobs)
- ifm_block = self.get_ifm_block_size(ifm_block_depth, ofm_block, kernel, self.ofm_block_max)
ifm_depth_blocks = round_up_divide(ifm.size().depth, ifm_block_depth)
ifm_depth_blocks = 1 # Overwrite with 1 to force OFM block dependency, not IFM
@@ -476,7 +479,6 @@ Note the difference between ArchitectureFeatures and CompilerOptions
# Iterate over the next BLOCKDEP inputs, checking to see if a sliding window
# of IFM area overlaps with any previous OFM block generation.
elapsed_jobs = 0
- ifm_depth = ifm.size().depth
for forward_offset in range(ArchitectureFeatures.MAX_BLOCKDEP):
# This is the IFM block we want to sample from
in_area = self.get_first_job_input_volume(
@@ -533,7 +535,7 @@ Note the difference between ArchitectureFeatures and CompilerOptions
n_elements = op.inputs[0].elements()
cycles = intercept + n_elements * slope
return cycles
- except:
+ except Exception:
print("Error: Reading CPU cycle estimate in vela configuration file, section {}".format(section))
raise
@@ -554,7 +556,7 @@ Note the difference between ArchitectureFeatures and CompilerOptions
print("Warning: Using default values for system configuration")
else:
section_key = "SysConfig." + self.system_config
- if not section_key in self.vela_config:
+ if section_key not in self.vela_config:
raise Exception("Unknown system configuration " + self.system_config)
try:
@@ -585,7 +587,7 @@ Note the difference between ArchitectureFeatures and CompilerOptions
+ " (must be 'OnChipFlash' or 'OffChipFlash'). To store the weights and other constant data in SRAM"
" select 'OnChipFlash'"
)
- except:
+ except Exception:
print("Error: Reading System Configuration in vela configuration file, section {}".format(section_key))
raise
diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py
index db669ac7..6fc3b653 100644
--- a/ethosu/vela/compiler_driver.py
+++ b/ethosu/vela/compiler_driver.py
@@ -18,6 +18,8 @@
# Description:
# Contains the main sequencing of the compiler.
+import time
+
from . import graph_optimiser
from . import mark_tensors
from . import insert_dma
@@ -25,9 +27,6 @@ from . import pass_packing
from . import scheduler
from . import tensor_allocation
from . import npu_performance
-import time
-
-from . import high_level_command_stream
from . import high_level_command_stream_generator
from . import register_command_stream_generator
from . import extract_npu_subgraphs
@@ -36,7 +35,7 @@ from . import weight_compressor
from . import live_range
from .tensor import MemArea
from .nn_graph import TensorAllocator, PassPlacement
-from .rewrite_graph import verify_graph_health, verify_subgraph_health
+from .rewrite_graph import verify_graph_health
class CompilerOptions:
diff --git a/ethosu/vela/data_type.py b/ethosu/vela/data_type.py
index 1d3e94ed..6dfe2167 100644
--- a/ethosu/vela/data_type.py
+++ b/ethosu/vela/data_type.py
@@ -18,9 +18,10 @@
# Description:
# Defines the basic numeric type classes for tensors.
-from .numeric_util import round_up_divide
import enum
+from .numeric_util import round_up_divide
+
class BaseType(enum.Flag):
Signed = 1
diff --git a/ethosu/vela/driver_actions.py b/ethosu/vela/driver_actions.py
index 86c4a369..bd15af20 100644
--- a/ethosu/vela/driver_actions.py
+++ b/ethosu/vela/driver_actions.py
@@ -18,9 +18,11 @@
# Description:
# Creates driver actions that are embedded in the custom operator payload.
-import numpy as np
from typing import List
-from .ethos_u55_regs.ethos_u55_regs import *
+
+import numpy as np
+
+from .ethos_u55_regs.ethos_u55_regs import config_r, id_r, ARCH_VER
class DACommands:
@@ -43,8 +45,8 @@ def make_da_tag(id: int, reserved: int, param: int) -> int:
def emit_fourcc(data: List[int], fourcc: str):
- assert data != None
- assert fourcc != None
+ assert data is not None
+ assert fourcc is not None
assert len(fourcc) == 4
value: int = 0
value = fourcc[0].encode()[0]
@@ -75,14 +77,14 @@ def build_config_word(arch):
def emit_config(data: List[int], rel: int, patch: int, arch):
- assert data != None
+ assert data is not None
data.append(make_da_tag(DACommands.Config, 0, (patch << DACommands.Config_PatchShift) | rel))
data.append(build_config_word(arch))
data.append(build_id_word())
def emit_cmd_stream_header(data: List[int], length: int):
- assert data != None
+ assert data is not None
# Insert NOPs to align start of command stream to 16 bytes
num_nops = 4 - ((len(data) + 1) % 4)
for _ in range(num_nops):
@@ -95,7 +97,7 @@ def emit_cmd_stream_header(data: List[int], length: int):
def emit_reg_read(data: List[int], reg_index: int, reg_count: int = 1):
- assert data != None
+ assert data is not None
assert reg_index >= 0
assert reg_count >= 1
payload: int = (reg_index & DACommands.ReadAPB_IndexMask) | ((reg_count << DACommands.ReadAPB_CountShift) - 1)
@@ -103,5 +105,5 @@ def emit_reg_read(data: List[int], reg_index: int, reg_count: int = 1):
def emit_dump_shram(data: List[int]):
- assert data != None
+ assert data is not None
data.append(make_da_tag(DACommands.DumpSHRAM, 0, 0))
diff --git a/ethosu/vela/extract_npu_subgraphs.py b/ethosu/vela/extract_npu_subgraphs.py
index 5b9ba8b0..ab3db21f 100644
--- a/ethosu/vela/extract_npu_subgraphs.py
+++ b/ethosu/vela/extract_npu_subgraphs.py
@@ -23,10 +23,11 @@
# by NpuOp operations. Later, Vela generates command streams and compressed weight streams for the NPU subgraphs and
# attaches them to the NpuOp. This encapsulates everything the NPU subgraph is supposed to do.
-from .nn_graph import Pass, PassPlacement, NpuBlockType, Subgraph
-from .operation import Operation
import numpy as np
+from .nn_graph import Pass, PassPlacement, Subgraph
+from .operation import Operation, NpuBlockType
+
def make_npu_call_op_pass(npu_subgraph):
op = Operation("NpuOp", "call_" + npu_subgraph.name)
diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py
index a4ed39ff..b29a3823 100644
--- a/ethosu/vela/graph_optimiser.py
+++ b/ethosu/vela/graph_optimiser.py
@@ -19,12 +19,15 @@
# Early optimisation of the network graph, using the rewrite_graph module to do the traversal of the graph. These are
# split into two parts optimise_graph_a and optimise_graph_b.
-from .nn_graph import Operation, NpuBlockType, Tensor
-from . import rewrite_graph
-from .data_type import BaseType, DataType
-import numpy as np
import math
-from .numeric_util import round_up_divide
+
+import numpy as np
+
+from . import rewrite_graph
+from .operation import Operation, NpuBlockType
+from .tensor import Tensor
+from .data_type import DataType
+
passthrough_nodes = set(("Identity",))
@@ -83,7 +86,7 @@ def rewrite_split(tens, arch):
# For Split the offset cannot be extracted from the tensor so it has to
# be calculated from the index of the output tensor
- if axis != None:
+ if axis is not None:
# Get the start and end of the split
offset_start = [0] * len(tens.shape)
offset_end = [0] * len(tens.shape)
@@ -316,6 +319,7 @@ elementwise_op = set(("AddAct", "MulAct", "SubAct", "Maximum", "Minimum", "Leaky
activation_ops = set(("Relu", "Relu6", "ReluN1To1", "Sigmoid", "Tanh"))
memory_only_ops = set(("Reshape",))
+
# Check if the op can be reordered
def get_prepend_op(op):
inp = op.inputs[0]
@@ -326,7 +330,7 @@ def get_prepend_op(op):
prep_op = prev_op
inp = prev_op.inputs[0]
prev_op = inp.ops[-1]
- if prev_op != None and len(prev_op.outputs) == 1 and len(prev_op.outputs[0].consumers()) == 1:
+ if prev_op is not None and len(prev_op.outputs) == 1 and len(prev_op.outputs[0].consumers()) == 1:
return prep_op
return None
@@ -384,7 +388,7 @@ def convert_depthwise_to_conv(op, arch):
def fixup_act_reorder(op, arch):
if op.type in activation_ops:
prep_op = get_prepend_op(op)
- if prep_op != None:
+ if prep_op is not None:
act_op = op.clone("_reordered")
act_op.inputs = [prep_op.inputs[0]]
act_op_out = act_op.inputs[0].clone("_acted")
@@ -400,7 +404,7 @@ def fixup_act_reorder(op, arch):
def convert_mul_max_to_abs_or_lrelu(op, arch):
- """Whenever there is a subgraph with this topology:
+ r"""Whenever there is a subgraph with this topology:
Input X For X = -1 or X > 0
| \ / This subgraph can be replaced with either
@@ -487,24 +491,25 @@ def optimise_graph_a(nng, arch, verbose_graph=False):
for idx, sg in enumerate(nng.subgraphs):
# rewrite graph pass
nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(
- sg, arch, [fixup_unpack_output,], op_rewrite_list, rewrite_unsupported=False
+ sg, arch, [fixup_unpack_output], op_rewrite_list, rewrite_unsupported=False
)
for idx, sg in enumerate(nng.subgraphs):
# remove passthrough tensors
- nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(sg, arch, [remove_passthrough_tensor,], [])
+ nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(sg, arch, [remove_passthrough_tensor], [])
if verbose_graph:
nng.print_graph()
return nng
+
def optimise_graph_b(nng, arch, verbose_graph=False):
if verbose_graph:
nng.print_graph()
for idx, sg in enumerate(nng.subgraphs):
# combined rewrite graph pass
- nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(sg, arch, [rewrite_concat, rewrite_split,], [])
+ nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(sg, arch, [rewrite_concat, rewrite_split], [])
if verbose_graph:
nng.print_graph()
diff --git a/ethosu/vela/high_level_command_stream.py b/ethosu/vela/high_level_command_stream.py
index 952e2033..bdb04904 100644
--- a/ethosu/vela/high_level_command_stream.py
+++ b/ethosu/vela/high_level_command_stream.py
@@ -18,8 +18,10 @@
# Description:
# Contains classes that hold commands for the high-level command stream (one command per DMA or NPU stripe).
-from enum import Enum, IntEnum
+from enum import IntEnum
+
import numpy as np
+
from .operation import NpuBlockType
from .numeric_util import round_up_divide
from .range_set import MemoryAccessSet, AccessDirection
@@ -42,12 +44,12 @@ class Box:
new_start_coord[concat_axis] -= concat_offset
new_end_coord[concat_axis] -= concat_offset
- if split_offset != None:
+ if split_offset is not None:
for idx in range(len(split_offset)):
new_start_coord[idx] += split_offset[idx]
new_end_coord[idx] += split_offset[idx]
- if split_offset == None and npu_block_type in set((NpuBlockType.ConvolutionMxN, NpuBlockType.VectorProduct)):
+ if split_offset is None and npu_block_type in set((NpuBlockType.ConvolutionMxN, NpuBlockType.VectorProduct)):
# these types of operations do a "dot product" over the entire IFM
new_start_coord[-1] = 0
new_end_coord[-1] = ifm_shape[-1]
diff --git a/ethosu/vela/high_level_command_stream_generator.py b/ethosu/vela/high_level_command_stream_generator.py
index 364df6f8..47392c0b 100644
--- a/ethosu/vela/high_level_command_stream_generator.py
+++ b/ethosu/vela/high_level_command_stream_generator.py
@@ -22,9 +22,8 @@
# calc_allowed_ofm_ifm_overlap_for_cascaded_pass().
from .nn_graph import SchedulingStrategy, PassPlacement
-import numpy as np
from .operation import NpuBlockType
-from .high_level_command_stream import Box, CommandType, Command, NpuStripe, DMA
+from .high_level_command_stream import Box, NpuStripe, DMA
def need_dma(tens):
diff --git a/ethosu/vela/insert_dma.py b/ethosu/vela/insert_dma.py
index b63c1ea1..33f1a02c 100644
--- a/ethosu/vela/insert_dma.py
+++ b/ethosu/vela/insert_dma.py
@@ -18,13 +18,14 @@
# Description:
# Insert DMA operations into the graph for transfering weights.
-from .nn_graph import Operation, MemArea, TensorPurpose, NpuBlockType
from . import rewrite_graph
+from .tensor import MemArea, TensorPurpose
+from .operation import Operation, NpuBlockType
def insert_dma_cmd(op, arch):
if op.type == "DMA":
- return op # Already rewritten
+ return op # Already rewritten
for idx, tens in enumerate(op.inputs):
if tens.mem_area in (MemArea.Dram, MemArea.OffChipFlash) and tens.mem_area != arch.fast_storage_mem_area:
diff --git a/ethosu/vela/live_range.py b/ethosu/vela/live_range.py
index 24f1f64c..54c15ba9 100644
--- a/ethosu/vela/live_range.py
+++ b/ethosu/vela/live_range.py
@@ -20,7 +20,7 @@
# Can work with either a pass packed subgraph or a scheduled subgraph.
from .tensor import Tensor, MemArea
-from .nn_graph import TensorPurpose, PassPlacement
+from .nn_graph import PassPlacement
from .high_level_command_stream_generator import calc_allowed_ofm_ifm_overlap_for_cascaded_pass
@@ -90,9 +90,9 @@ class LiveRange:
if tens.address == 0:
tens.address = address
# Also need to set the address to the tensor's cpu/npu clones
- if tens.cpu_tensor != None:
+ if tens.cpu_tensor is not None:
tens.cpu_tensor.address = address
- if tens.npu_tensor != None:
+ if tens.npu_tensor is not None:
tens.npu_tensor.address = address
def get_alignment(self):
@@ -115,8 +115,8 @@ def merge_memory_op_ranges(sg, lr_graph, tensor_should_be_ignored, target_mem_ar
output_tensor = ps.outputs[0]
# If the input or output tensor is tied to a Cpu tensor, i.e. a subgraph input
# or output, fuse the live-range with the Cpu tensors' live-range instead.
- input_tensor = input_tensor.cpu_tensor if input_tensor.cpu_tensor != None else input_tensor
- output_tensor = output_tensor.cpu_tensor if output_tensor.cpu_tensor != None else output_tensor
+ input_tensor = input_tensor.cpu_tensor if input_tensor.cpu_tensor is not None else input_tensor
+ output_tensor = output_tensor.cpu_tensor if output_tensor.cpu_tensor is not None else output_tensor
if not tensor_should_be_ignored(input_tensor, target_mem_area) and not tensor_should_be_ignored(
output_tensor, target_mem_area
):
@@ -221,7 +221,7 @@ def extract_live_ranges_from_cascaded_passes(
ignore_subgraph_input_output_tensors=False,
lr_graph=None,
):
- if lr_graph == None:
+ if lr_graph is None:
lr_graph = LiveRangeGraph()
if sg in lr_graph.processed_subgraphs:
diff --git a/ethosu/vela/mark_tensors.py b/ethosu/vela/mark_tensors.py
index 9b1824b5..c42a28df 100644
--- a/ethosu/vela/mark_tensors.py
+++ b/ethosu/vela/mark_tensors.py
@@ -21,7 +21,7 @@
from . import rewrite_graph
from . import weight_compressor
from .architecture_features import Block
-from .nn_graph import TensorPurpose, TensorFormat, PassPlacement
+from .tensor import TensorPurpose, TensorFormat
from .operation import NpuBlockType
@@ -55,6 +55,7 @@ def inputs_from_output(op, idx):
print("Warning: Propagating unknown tensor purpose", op)
return res
+
tensor_purposes = [ # ops, input_purpose
(
set(
@@ -327,7 +328,7 @@ def mark_tensor_format(nng, arch, verbose_tensor_format=False):
return NpuBlockType.Default
def visit_tens(tens, ps):
- if not tens in formats_for_tensor:
+ if tens not in formats_for_tensor:
fmt = init_tens(tens)
else:
fmt = formats_for_tensor[tens]
diff --git a/ethosu/vela/nn_graph.py b/ethosu/vela/nn_graph.py
index 8d335bd8..e7820fe6 100644
--- a/ethosu/vela/nn_graph.py
+++ b/ethosu/vela/nn_graph.py
@@ -24,9 +24,6 @@
# Graph - A full neural network graph with one or more Subgraphs.
import enum
-from .data_type import BaseType, DataType
-from .tensor import MemArea, TensorPurpose, TensorSubPurpose, TensorFormat, Tensor
-from .operation import Operation, NpuBlockType
class PassPlacement(enum.Enum):
diff --git a/ethosu/vela/npu_performance.py b/ethosu/vela/npu_performance.py
index 84cc4931..11f1e92b 100644
--- a/ethosu/vela/npu_performance.py
+++ b/ethosu/vela/npu_performance.py
@@ -23,12 +23,13 @@
# estimate.
import enum
-from . import numeric_util
+
import numpy as np
-from .tensor import TensorPurpose, MemArea, TensorFormat, shape_num_elements, Tensor, TensorBlockTraversal
-from .operation import Operation
-from .data_type import DataType, BaseType
-from .nn_graph import PassPlacement, NpuBlockType, SchedulerRewrite, Pass
+
+from . import numeric_util
+from .tensor import TensorPurpose, MemArea, shape_num_elements, TensorBlockTraversal
+from .nn_graph import PassPlacement, SchedulerRewrite
+from .operation import NpuBlockType
from .architecture_features import Block, Kernel
@@ -357,9 +358,7 @@ def performance_metrics_for_pass(arch, ps, block_config=None, rewrite_list=[], f
n_kernel_xy, 4
) # need at least 4, as this is the minimum duty cycle for secondary accumulator writes
if weight_tensor is not None:
- n_kernel_xy = numeric_util.round_up(
- n_kernel_xy, 4
- ) # weights need to be read in blocks of 4
+ n_kernel_xy = numeric_util.round_up(n_kernel_xy, 4) # weights need to be read in blocks of 4
num_mac_ops = 0
for n_blocks_for_size, block_size in block_setup:
diff --git a/ethosu/vela/npu_serialisation.py b/ethosu/vela/npu_serialisation.py
index 4542c25b..29ede842 100644
--- a/ethosu/vela/npu_serialisation.py
+++ b/ethosu/vela/npu_serialisation.py
@@ -18,13 +18,15 @@
# Description:
# Serialises and packs an NPU subgraph into tensors.
+import struct
+
+import numpy as np
+
+from . import driver_actions
from .nn_graph import PassPlacement
from .tensor import MemArea, Tensor, TensorPurpose, TensorFormat
from .operation import Operation
from .data_type import DataType
-import numpy as np
-from . import driver_actions
-import struct
def make_memory_tensor(name, mem_area, sz, want_values, arch):
@@ -75,7 +77,7 @@ def serialise_npu_subgraph_into_tensors(nng, sg, arch, scratch_tens, flash_tens)
nng.total_size[scratch_area] = nng.total_size.get(scratch_area, 0) - scratch_size
nng.total_elements[scratch_area] = nng.total_elements.get(scratch_area, 0) - scratch_size
- if flash_tens == scratch_tens == None:
+ if flash_tens == scratch_tens is None:
# First Npu subgraph, create scratch and flash tensors
sg.scratch_tensor = make_memory_tensor(sg.name + "_scratch", scratch_area, scratch_size, False, arch)
sg.scratch_tensor.purpose = TensorPurpose.Scratch
@@ -88,7 +90,7 @@ def serialise_npu_subgraph_into_tensors(nng, sg, arch, scratch_tens, flash_tens)
for cps in sg.cascaded_passes:
for ps in cps.passes:
- if ps.placement == PassPlacement.Npu and ps.weight_tensor != None:
+ if ps.placement == PassPlacement.Npu and ps.weight_tensor is not None:
# For DMA ops, ps.weight_tensor is referring to the SRAM weight tensor and therefore the address
# is pointing at the destination address of where the weights should be placed in SRAM.
# This ensures that the Flash weight tensor is used instead and thus gets the correct address.
diff --git a/ethosu/vela/numeric_util.py b/ethosu/vela/numeric_util.py
index e5bc88b8..4e61b4c5 100644
--- a/ethosu/vela/numeric_util.py
+++ b/ethosu/vela/numeric_util.py
@@ -19,6 +19,7 @@
# Numerical utilities for various types of rounding etc.
import math
+
import numpy as np
diff --git a/ethosu/vela/pass_packing.py b/ethosu/vela/pass_packing.py
index 663520fc..bae81517 100644
--- a/ethosu/vela/pass_packing.py
+++ b/ethosu/vela/pass_packing.py
@@ -18,10 +18,12 @@
# Description:
# Packs a subgraph with Neural Network Operations into Passes. Each Pass has one or more Operations.
-from .nn_graph import Operation, Pass, PassPlacement, TensorPurpose, NpuBlockType, Tensor
-import collections
import enum
-from .data_type import BaseType, DataType
+import collections
+
+from .nn_graph import Pass, PassPlacement
+from .tensor import TensorPurpose
+from .operation import Operation, NpuBlockType
class PassFlags(enum.Flag):
@@ -104,10 +106,7 @@ elem_wise_ops = elem_wise_main_ops | activation_ops | set(("Sigmoid", "Tanh"))
quantization_ops = set(("Dequantize", "QuantizeV2", "Max", "Min"))
-cpu_ops = (
- set(("Softmax", "QuantizedSoftmax", "LRN", "Shape", "QuantizedPad", "Pad", "AddN"))
- | quantization_ops
-)
+cpu_ops = set(("Softmax", "QuantizedSoftmax", "LRN", "Shape", "QuantizedPad", "Pad", "AddN")) | quantization_ops
npu_dma_ops = set(("DMA",))
startup_init_ops = set(("Const", "VariableV2", "Placeholder", "SubgraphInput"))
@@ -183,7 +182,7 @@ test_sequence = [
# flags_to_set
PassFlags.Npu | PassFlags.Dma,
# flags_to_clear
- PassFlags.Empty
+ PassFlags.Empty,
),
(
# ops_set
@@ -203,7 +202,7 @@ test_sequence = [
# flags_to_set
PassFlags.MemoryOnly | PassFlags.Main,
# flags_to_clear
- PassFlags.Empty
+ PassFlags.Empty,
),
(
# ops_set
@@ -213,9 +212,9 @@ test_sequence = [
# flags_to_set
PassFlags.Cpu | PassFlags.Main,
# flags_to_clear
- PassFlags.Empty
+ PassFlags.Empty,
),
- ( # This last one is a fallback for unrecognised operations
+ ( # This last one is a fallback for unrecognised operations
# ops_set
None,
# incompatible_pack_flags
@@ -223,7 +222,7 @@ test_sequence = [
# flags_to_set
PassFlags.Cpu | PassFlags.Main,
# flags_to_clear
- PassFlags.Empty
+ PassFlags.Empty,
),
]
@@ -346,7 +345,7 @@ def pack_into_passes(nng, arch, verbose_packing=False):
is_element_wise = True
for op in reverse_ops_list:
- if not op.type in elem_wise_ops and not op.type in npu_dma_ops:
+ if op.type not in elem_wise_ops and op.type not in npu_dma_ops:
is_element_wise = False
break
@@ -368,9 +367,9 @@ def pack_into_passes(nng, arch, verbose_packing=False):
ops_list = list(reversed(reverse_ops_list))
intermediates = list(reversed(reverse_intermediates))
- if primary_op == None:
+ if primary_op is None:
primary_op = create_primary_op(ops_list)
- if primary_op != None:
+ if primary_op is not None:
visit_tensor_refcount[primary_op.inputs[0]] += 1
npu_block_type = primary_op.attrs["npu_block_type"]
for input_tens in primary_op.inputs:
diff --git a/ethosu/vela/range_set.py b/ethosu/vela/range_set.py
index 64de9709..d7623c5a 100644
--- a/ethosu/vela/range_set.py
+++ b/ethosu/vela/range_set.py
@@ -19,7 +19,6 @@
# Helper classes to track memory accesses for calculating dependencies between Commands.
from enum import IntEnum
-from collections import defaultdict
from functools import lru_cache
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index 120cf8b1..460cf016 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py
@@ -22,25 +22,19 @@
from collections import defaultdict
from enum import Enum, IntEnum
+
+import numpy as np
+
+from . import scaling
from .high_level_command_stream import CommandType
-from .ethos_u55_regs.ethos_u55_regs import *
-from .tensor import MemArea, TensorBlockTraversal
+from .ethos_u55_regs.ethos_u55_regs import cmd0, cmd1, acc_format, elementwise_mode, rounding, activation, ifm_precision
+from .tensor import MemArea, TensorBlockTraversal, TensorFormat
from .operation import NpuBlockType
from .numeric_util import quantise_float32, round_up, round_away_zero, round_up_to_int, clamp_sigmoid, clamp_tanh
from .data_type import BaseType, DataType
-import numpy as np
from .shared_buffer_allocation import SharedBufferAllocation
from .architecture_features import SharedBufferArea, SHRAMElements, ArchitectureFeatures
-from .nn_graph import TensorFormat, SchedulingStrategy
-from .range_set import (
- MemoryAccessSet,
- AccessDirection,
-)
-from .mark_tensors import (
- reshape_operations,
-)
from .architecture_features import Block, Kernel, Rect
-from . import scaling
class RegisterMachine:
@@ -372,7 +366,6 @@ def generate_register_command_stream(nng, sg, arch, verbose=False):
param = relative_dep[CommandType.DMA][0]
param = min(param, 0xF) # Clamp to allowable wait amount
emit.cmd_wait(cmd0.NPU_OP_DMA_WAIT, param, absolute_dep[CommandType.DMA][0])
- prev_cmd = None # Clear any dependency
for cmd in cmd_stream:
if cmd.cmdtype == CommandType.DMA:
@@ -684,7 +677,7 @@ def generate_register_command_stream(nng, sg, arch, verbose=False):
ifm_max = cmd.ifm_tensor.quantization.max
# Emit commands for any fused activation function
- if faf == None:
+ if faf is None:
emit.cmd0_with_param(cmd0.NPU_SET_ACTIVATION, activation.NONE)
# Even if no activation function, values need to be set to override previous values
faf_min = ofm_quant_qmin
@@ -765,13 +758,13 @@ def generate_register_command_stream(nng, sg, arch, verbose=False):
),
):
- if tens == None:
+ if tens is None:
continue
- need_zero_point = (faf != None) or (fmf == "ConcatSliceWrite")
+ need_zero_point = (faf is not None) or (fmf == "ConcatSliceWrite")
if (
primary_op.type in set(("AvgPool", "AvgPoolAct")) and not need_zero_point
- ) or tens.quantization == None:
+ ) or tens.quantization is None:
# Actual integer operation, just set scale to 1 and zero point to 0
emit.cmd0_with_param(zero_point_op, 0)
else:
diff --git a/ethosu/vela/scaling.py b/ethosu/vela/scaling.py
index ce0259a5..3b749ddd 100644
--- a/ethosu/vela/scaling.py
+++ b/ethosu/vela/scaling.py
@@ -19,9 +19,10 @@
# Contains various scaling calculations for weights, elementwise operations, pooling etc.
import math
-from .numeric_util import round_away_zero
from enum import IntEnum
+from .numeric_util import round_away_zero
+
class OperandToScale(IntEnum):
OPa = 1
diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py
index d51b5ac6..fe31a463 100644
--- a/ethosu/vela/scheduler.py
+++ b/ethosu/vela/scheduler.py
@@ -19,24 +19,17 @@
# The scheduler costs various strategies for scheduling the network in order to select the block configuration.
import enum
-from .nn_graph import (
- TensorPurpose,
- TensorSubPurpose,
- TensorFormat,
- MemArea,
- SchedulingStrategy,
- CascadedPass,
- PassPlacement,
- SchedulerRewrite,
- Operation,
- NpuBlockType,
-)
-from . import live_range
+import copy
+
import numpy as np
+
+from . import live_range
from . import npu_performance
from . import stats_writer
+from .tensor import TensorPurpose, TensorSubPurpose, TensorFormat, MemArea
+from .operation import NpuBlockType
+from .nn_graph import SchedulingStrategy, CascadedPass, PassPlacement, SchedulerRewrite
from .npu_performance import make_bandwidth_array, make_macs_array, make_cycles_array, make_metrics_arrays, PassCycles
-import time, copy
from .high_level_command_stream_generator import calc_allowed_ofm_ifm_overlap_for_pass_list
from .shared_buffer_allocation import (
find_block_configs_suitable_for_pass_and_shared_buffer,
@@ -279,7 +272,6 @@ class DynamicProgrammingScheduler:
if len(candidates) <= 1:
return candidates
assert remove_equally_good_candidates
- start = time.time()
pareto_vals = np.zeros((len(candidates), DynamicProgrammingScheduler.num_pareto_metrics))
ids = np.arange(len(candidates), dtype=np.int32)
for idx, cand in enumerate(candidates):
@@ -713,7 +705,7 @@ class DynamicProgrammingScheduler:
def get_block_configs(self, ps):
if ps.placement != PassPlacement.Npu:
- return [(1, 1, 1, 1)] # default
+ return [(1, 1, 1, 1)] # default
block_configs = find_block_configs_suitable_for_pass_and_shared_buffer(self.arch, ps)
@@ -764,9 +756,7 @@ class DynamicProgrammingScheduler:
for tens in ps.intermediates:
if tens.mem_area == self.mem_area:
if tens.purpose == TensorPurpose.Weights:
- sram_used += tens.storage_size_for_sub_purpose(
- TensorSubPurpose.DoubleBuffer, block_config[3]
- )
+ sram_used += tens.storage_size_for_sub_purpose(TensorSubPurpose.DoubleBuffer, block_config[3])
rewrite_list.append(
(
SchedulerRewrite.ChangeTensorSubPurpose,
@@ -884,7 +874,7 @@ class DynamicProgrammingScheduler:
% (len(self.sg.passes), len(pass_to_cascaded_pass))
)
for ps in self.sg.passes:
- if not ps in pass_to_cascaded_pass:
+ if ps not in pass_to_cascaded_pass:
print("%3d pass missing cascaded pass %s" % (ps.time, ps))
assert len(pass_to_cascaded_pass) == len(self.sg.passes)
diff --git a/ethosu/vela/shared_buffer_allocation.py b/ethosu/vela/shared_buffer_allocation.py
index b5408d19..29be6d8d 100644
--- a/ethosu/vela/shared_buffer_allocation.py
+++ b/ethosu/vela/shared_buffer_allocation.py
@@ -19,10 +19,9 @@
# Shared buffer allocation works out how to allocate the Ethos-U55 shared buffer for a given pass.
import numpy as np
-from .nn_graph import NpuBlockType
-from .numeric_util import round_up_divide, round_up
+
+from .operation import NpuBlockType
from .architecture_features import Block, Kernel, SHRAMElements, SharedBufferArea, ArchitectureFeatures
-from . import pass_packing
class SharedBufferAllocation:
diff --git a/ethosu/vela/stats_writer.py b/ethosu/vela/stats_writer.py
index c4b4cd9e..3fd29d12 100644
--- a/ethosu/vela/stats_writer.py
+++ b/ethosu/vela/stats_writer.py
@@ -18,12 +18,15 @@
# Description:
# Writes out per-pass and summary performance statistics to CSV files.
+import csv
+import sys
+
import numpy as np
-from .nn_graph import MemArea, TensorPurpose, PassPlacement
+
+from .tensor import MemArea, TensorPurpose
+from .nn_graph import PassPlacement
from .npu_performance import PassCycles, MacCount, BandwidthDirection
-import csv
from .numeric_util import round_up_to_int
-import sys
def write_summary_metrics_csv(nng, summary_filename, arch):
@@ -246,7 +249,7 @@ def print_performance_metrics_for_strat(
print(file=f)
for mem_area, label in mem_area_labels:
- if not mem_area in memory_used:
+ if mem_area not in memory_used:
continue
aug_label = label + " used"
diff --git a/ethosu/vela/tensor.py b/ethosu/vela/tensor.py
index 5d0206cc..5cebf4d0 100644
--- a/ethosu/vela/tensor.py
+++ b/ethosu/vela/tensor.py
@@ -19,10 +19,11 @@
# Internal representation of a Neural Network Tensor.
import enum
-from . import numeric_util
-import numpy as np
-from . import data_type
import uuid
+
+import numpy as np
+
+from . import numeric_util
from .range_set import MemoryRangeSet
from .numeric_util import round_up_divide
diff --git a/ethosu/vela/tensor_allocation.py b/ethosu/vela/tensor_allocation.py
index 94aa6088..255156e6 100644
--- a/ethosu/vela/tensor_allocation.py
+++ b/ethosu/vela/tensor_allocation.py
@@ -19,13 +19,14 @@
# Wrapping function to do tensor address allocation. That is, assigning addresses to tensors based on what has been
# worked out from the allowable overlaps that are calculated by the live range analysis.
-from . import live_range
-from .tensor import MemArea
import math
-from . import numeric_util
+
import numpy as np
-from .nn_graph import TensorAllocator, PassPlacement
+from . import live_range
+from . import numeric_util
+from .tensor import MemArea
+from .nn_graph import TensorAllocator
from .greedy_allocation import allocate_live_ranges as greedy_allocate_live_ranges
diff --git a/ethosu/vela/tflite_mapping.py b/ethosu/vela/tflite_mapping.py
index e2b90765..e8b40bdb 100644
--- a/ethosu/vela/tflite_mapping.py
+++ b/ethosu/vela/tflite_mapping.py
@@ -20,19 +20,11 @@
# Contains a mapping from the various TensorFlow Lite enums and options structs, generated by the FlatBuffer code
# generator, to Vela's internal format.
-import numpy as np
import struct
-from .data_type import DataType
-
-from .tflite.TensorType import TensorType
-from .tflite.BuiltinOperator import BuiltinOperator
-from .tflite.BuiltinOptions import BuiltinOptions
-
-
-from .tflite.Padding import Padding
-from .tflite.ActivationFunctionType import ActivationFunctionType
+import numpy as np
+from .data_type import DataType
from .tflite import Conv2DOptions
from .tflite import DepthwiseConv2DOptions
from .tflite import ConcatEmbeddingsOptions
@@ -132,6 +124,11 @@ from .tflite import ScatterNdOptions
from .tflite import SegmentSumOptions
from .tflite import SelectV2Options
from .tflite import WhileOptions
+from .tflite.TensorType import TensorType
+from .tflite.BuiltinOperator import BuiltinOperator
+from .tflite.BuiltinOptions import BuiltinOptions
+from .tflite.Padding import Padding
+from .tflite.ActivationFunctionType import ActivationFunctionType
def inverse_map(map):
diff --git a/ethosu/vela/tflite_reader.py b/ethosu/vela/tflite_reader.py
index 535847d7..4456d5a0 100644
--- a/ethosu/vela/tflite_reader.py
+++ b/ethosu/vela/tflite_reader.py
@@ -18,14 +18,15 @@
# Description:
# Functions used to read from a TensorFlow Lite format file.
-from .tflite.Model import Model
-from .tflite.BuiltinOperator import BuiltinOperator
+import os.path
import numpy as np
-import os.path
-from .nn_graph import Graph, Operation, Subgraph
-from .tensor import Tensor, QuantizationParameters
+from .tflite.Model import Model
+from .tflite.BuiltinOperator import BuiltinOperator
+from .nn_graph import Graph, Subgraph
+from .operation import Operation
+from .tensor import Tensor, QuantizationParameters
from .tflite_mapping import builtin_operator_map, datatype_map, datatype_map_numpy, DataType
@@ -184,12 +185,7 @@ class TFLiteSubgraph:
class TFLiteGraph:
def __init__(
- self,
- filename,
- batch_size=1,
- feed_dict={},
- output_node_names=[],
- initialisation_nodes=[],
+ self, filename, batch_size=1, feed_dict={}, output_node_names=[], initialisation_nodes=[],
):
self.op_times = {}
@@ -238,15 +234,9 @@ class TFLiteGraph:
def read_tflite(
- filename,
- batch_size=1,
- feed_dict={},
- output_node_names=[],
- initialisation_nodes=[],
+ filename, batch_size=1, feed_dict={}, output_node_names=[], initialisation_nodes=[],
):
- tflite_graph = TFLiteGraph(
- filename, batch_size, feed_dict, output_node_names, initialisation_nodes
- )
+ tflite_graph = TFLiteGraph(filename, batch_size, feed_dict, output_node_names, initialisation_nodes)
nng = tflite_graph.nng
nng.refresh_after_modification()
return nng
diff --git a/ethosu/vela/tflite_writer.py b/ethosu/vela/tflite_writer.py
index f55d1ce5..1f072424 100644
--- a/ethosu/vela/tflite_writer.py
+++ b/ethosu/vela/tflite_writer.py
@@ -18,7 +18,13 @@
# Description:
# Functions used to write to a TensorFlow Lite format file. Supports adding in file identifiers.
+import numpy as np
import flatbuffers
+from flatbuffers.builder import UOffsetTFlags
+
+# ugh, the python flatbuffer interface is missing a method to add in file identifier. patching it in here:
+import flatbuffers.number_types as N
+from flatbuffers import encode
from .tflite import Tensor
from .tflite import QuantizationParameters
@@ -28,22 +34,14 @@ from .tflite import OperatorCode
from .tflite import Operator
from .tflite import Buffer
from .tflite import Metadata
-
-import numpy as np
-
from .tflite_mapping import datatype_inv_map, builtin_operator_inv_map, custom_prefix, BuiltinOperator
from .nn_graph import PassPlacement
from .tensor import TensorPurpose, MemArea
-from flatbuffers.builder import UOffsetTFlags
tflite_version = 3
tflite_file_identifier = "TFL" + str(tflite_version)
-import flatbuffers.number_types as N
-from flatbuffers import encode
-
-
def FinishWithFileIdentifier(self, rootTable, fid):
if fid is None or len(fid) != 4:
raise Exception("fid must be 4 chars")
@@ -163,8 +161,8 @@ class TFLiteSerialiser:
tf_code, opt_serializer = builtin_operator_inv_map[code]
except KeyError:
print(
- "Warning: Writing operation %s, which does not have a direct TensorFlow Lite mapping, as a custom operation"
- % (code,)
+ "Warning: Writing operation %s, which does not have a direct TensorFlow Lite mapping,"
+ "as a custom operation" % (code,)
)
tf_code, opt_serializer = builtin_operator_inv_map[custom_prefix]
diff --git a/ethosu/vela/vela.py b/ethosu/vela/vela.py
index f07aec89..07772e66 100644
--- a/ethosu/vela/vela.py
+++ b/ethosu/vela/vela.py
@@ -20,11 +20,10 @@
#
# Provides command line interface, options parsing, and network loading. Before calling the compiler driver.
-import sys
-import os.path
import os
+import os.path
+import sys
import time
-import subprocess
import configparser
import argparse
import ast
@@ -37,7 +36,8 @@ from . import compiler_driver
from . import scheduler
from ._version import __version__
from .scheduler import ParetoMetric
-from .nn_graph import MemArea, TensorFormat, TensorAllocator, PassPlacement
+from .nn_graph import TensorAllocator, PassPlacement
+from .tensor import MemArea
def process(fname, arch, model_reader_options, compiler_options, scheduler_options):
diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py
index 92197248..ee554b5c 100644
--- a/ethosu/vela/weight_compressor.py
+++ b/ethosu/vela/weight_compressor.py
@@ -18,12 +18,11 @@
# Description:
# Compresses and pads the weigths. It also calculates the scales and packs with the biases.
-import os
-import sys
-import enum
import math
-import numpy as np
from collections import namedtuple
+
+import numpy as np
+
from .numeric_util import round_up
from .scaling import quantise_scale, reduced_quantise_scale
from .tensor import TensorPurpose, TensorSubPurpose, TensorFormat, TensorBlockTraversal
@@ -44,7 +43,7 @@ def encode(weight_stream):
# pad with 0xFF as needed so the length of the weight stream
# is a multiple of 16
-
+
while (len(compressed) % 16) != 0:
compressed.append(0xFF)
@@ -348,7 +347,7 @@ def update_pass_weight_and_scale_tensors(nng, arch):
for sg in nng.subgraphs:
for ps in sg.passes:
- if ps.weight_tensor != None:
+ if ps.weight_tensor is not None:
npu_usage_of_tensor = find_npu_usage_of_tensor(ps.weight_tensor)
if npu_usage_of_tensor == NpuBlockType.ConvolutionDepthWise:
ps.weight_tensor.quant_values = np.transpose(ps.weight_tensor.quant_values, (0, 1, 3, 2))
@@ -382,7 +381,7 @@ def update_pass_weight_and_scale_tensors(nng, arch):
src_tens.weight_compression_scales = ps.weight_tensor.weight_compression_scales
src_tens.weight_compressed_offsets = ps.weight_tensor.weight_compressed_offsets
- if ps.scale_tensor != None:
+ if ps.scale_tensor is not None:
rescale_for_faf = False
activation_ops = set(("Sigmoid", "Tanh"))
if (ps.ops[-1].type in activation_ops) and (ps.npu_block_type != NpuBlockType.ElementWise):