aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwilisa01 <william.isaksson@arm.com>2023-02-08 09:56:14 +0000
committerFredrik Svedberg <fredrik.svedberg@arm.com>2023-02-15 15:25:25 +0000
commit46c9477ded912f26ddf0a761c728d23f7d616004 (patch)
tree52dfd6f42239da31a6c498795756779ae2ff150d
parent16b5e5e2d79564c7bb82ce6750cb2aec205d6621 (diff)
downloadethos-u-vela-46c9477ded912f26ddf0a761c728d23f7d616004.tar.gz
MLBEDSW-7211: Convert fixup_asymmetric_weights to supported ops check
Changed default behaviour to place int8 ops with asymmetric quantization on cpu, and added an option to force symmetric quantization Change-Id: Ib9b717aaf61eae78833254ca3dfa745f4f253dc6 Signed-off-by: wilisa01 <william.isaksson@arm.com>
-rw-r--r--OPTIONS.md9
-rw-r--r--ethosu/vela/compiler_driver.py8
-rw-r--r--ethosu/vela/graph_optimiser.py6
-rw-r--r--ethosu/vela/tflite_graph_optimiser.py49
-rw-r--r--ethosu/vela/vela.py8
5 files changed, 63 insertions, 17 deletions
diff --git a/OPTIONS.md b/OPTIONS.md
index 9a26e2fb..618b9dec 100644
--- a/OPTIONS.md
+++ b/OPTIONS.md
@@ -126,6 +126,15 @@ scheduling. Prints the results to standard out.
vela network.tflite --timing
```
+### Force Symmetric Int Weights
+
+Forces symmetric quantization for signed integer weights. This means that all zero points are set
+to 0. This might lead to unintended behaviour.
+
+```bash
+vela network.tflite --force-symmetric-int-weights
+```
+
### Accelerator Configuration
Choose which hardware accelerator configuration to compile for. Format is
diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py
index 439b954a..d2892096 100644
--- a/ethosu/vela/compiler_driver.py
+++ b/ethosu/vela/compiler_driver.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2020-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -65,6 +65,7 @@ class CompilerOptions:
show_cpu_operations=False,
tensor_allocator=TensorAllocator.Greedy,
timing=False,
+ force_symmetric_int_weights=False,
output_dir="outputs",
cpu_tensor_alignment=Tensor.AllocationQuantum,
hillclimb_max_iterations=None,
@@ -84,6 +85,7 @@ class CompilerOptions:
self.show_cpu_operations = show_cpu_operations
self.tensor_allocator = tensor_allocator
self.timing = timing
+ self.force_symmetric_int_weights = force_symmetric_int_weights
self.output_dir = output_dir
self.cpu_tensor_alignment = cpu_tensor_alignment
self.hillclimb_max_iterations = hillclimb_max_iterations
@@ -157,7 +159,9 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type, output_
for sg in nng.subgraphs:
visit_graph_post_order(sg.output_tensors, arch, [], [_record_operator])
- nng = graph_optimiser.optimise_graph(nng, arch, network_type, options.verbose_graph)
+ nng = graph_optimiser.optimise_graph(
+ nng, arch, network_type, options.verbose_graph, options.force_symmetric_int_weights
+ )
assert verify_graph_health(nng)
if options.verbose_quantization:
diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py
index 4505cf58..984a0167 100644
--- a/ethosu/vela/graph_optimiser.py
+++ b/ethosu/vela/graph_optimiser.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2020-2021 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2020-2021, 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -25,13 +25,13 @@ from .tflite_graph_optimiser import tflite_optimise_graph
from .tosa_graph_optimiser import tosa_optimise_graph
-def optimise_graph(nng, arch, network_type, verbose_graph=False):
+def optimise_graph(nng, arch, network_type, verbose_graph=False, force_symmetric_int_weights=False):
if verbose_graph:
nng.print_graph("Before Graph Optimization")
if network_type == NetworkType.TFLite:
# TensorFlow Lite graph optimization
- nng = tflite_optimise_graph(nng, arch)
+ nng = tflite_optimise_graph(nng, arch, force_symmetric_int_weights)
else:
# TOSA graph optimization
nng = tosa_optimise_graph(nng, arch)
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index 2026892e..3a49309d 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -1583,16 +1583,42 @@ def fixup_bias_tensors(op, arch, nng, dtype=None):
return op
-def fixup_asymmetric_weights(op, arch, nng):
- if op.run_on_npu and (op.type.is_conv2d_op() or op.type.is_depthwise_conv2d_op()):
- if op.ifm.dtype == DataType.int8:
+def detect_asymmetric_weights(op):
+ # Check all ops (cpu and npu)
+ if op.type.is_conv2d_op() or op.type.is_depthwise_conv2d_op():
+ if op.ifm.dtype in (DataType.int8, DataType.int16):
if not np.all(op.weights.quantization.zero_point == 0):
- print(f"Warning: {op.type} '{op.name}' has asymmetric weights, zero points have been adjusted.")
- op.weights.quantization.zero_point *= 0
+ print(f"Warning: Op {op.type} '{op.name}' has asymmetric weights.", end=" ")
+ return True
+ return False
+
+
+def fixup_asymmetric_weights(op, arch, nng):
+ if detect_asymmetric_weights(op):
+ if op.run_on_npu:
+ print("Zero points have been adjusted.")
+ op.weights.quantization.zero_point *= 0
+ return op
+
+def check_asymmetric_weights(op, arch, nng):
+ # This function can modify the run_on_npu flag which causes an operator to be placed on the CPU. It is usually only
+ # set by the supported operator checks. Therefore, it should be run immediately after those checks to avoid the
+ # possibility of other graph optimiser functions modify the operator (that is later run on the CPU)
+ if detect_asymmetric_weights(op):
+ if op.run_on_npu:
+ print("To run the operator on Ethos-U use the option --force-symmetric-int-weights")
+ op.run_on_npu = False
return op
+def fixup_or_check_asymmetric_weights(force_symmetric_int_weights):
+ if force_symmetric_int_weights:
+ return fixup_asymmetric_weights
+ else:
+ return check_asymmetric_weights
+
+
def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng):
if op.type == Op.Mean and op.run_on_npu:
inp, axis = op.inputs
@@ -1871,9 +1897,13 @@ def supported_operator_check(op, arch, nng):
return op
-def tflite_optimise_graph(nng, arch):
+def tflite_optimise_graph(nng, arch, force_symmetric_int_weights):
# Compile time static optimisations
- optimisation_list = [optimise_quantize, convert_shape_op_to_constant_tensor]
+ optimisation_list = [
+ optimise_quantize,
+ convert_shape_op_to_constant_tensor,
+ fixup_or_check_asymmetric_weights(force_symmetric_int_weights),
+ ]
for idx, sg in enumerate(nng.subgraphs):
nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(
@@ -1886,10 +1916,7 @@ def tflite_optimise_graph(nng, arch):
)
# Pre-processing step
- pre_process_list = [
- supported_operator_check,
- set_ifm_ofm_op_shapes,
- ]
+ pre_process_list = [supported_operator_check, set_ifm_ofm_op_shapes]
for idx, sg in enumerate(nng.subgraphs):
nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(
diff --git a/ethosu/vela/vela.py b/ethosu/vela/vela.py
index b5dfd80c..c44c7894 100644
--- a/ethosu/vela/vela.py
+++ b/ethosu/vela/vela.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2020-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -377,6 +377,11 @@ def main(args=None):
)
parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")
parser.add_argument(
+ "--force-symmetric-int-weights",
+ action="store_true",
+ help="Forces all zero points to 0 for signed integer weights",
+ )
+ parser.add_argument(
"--accelerator-config",
type=str,
default="ethos-u55-256",
@@ -553,6 +558,7 @@ def main(args=None):
show_cpu_operations=args.show_cpu_operations,
tensor_allocator=args.tensor_allocator,
timing=args.timing,
+ force_symmetric_int_weights=args.force_symmetric_int_weights,
output_dir=args.output_dir,
cpu_tensor_alignment=args.cpu_tensor_alignment,
hillclimb_max_iterations=args.hillclimb_max_iterations,