aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/mark_tensors.py
diff options
context:
space:
mode:
Diffstat (limited to 'ethosu/vela/mark_tensors.py')
-rw-r--r--ethosu/vela/mark_tensors.py363
1 files changed, 363 insertions, 0 deletions
diff --git a/ethosu/vela/mark_tensors.py b/ethosu/vela/mark_tensors.py
new file mode 100644
index 00000000..9b1824b5
--- /dev/null
+++ b/ethosu/vela/mark_tensors.py
@@ -0,0 +1,363 @@
+# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Description:
+# Mark purpose and select formats for Tensors. Also compresses the weights.
+
+from . import rewrite_graph
+from . import weight_compressor
+from .architecture_features import Block
+from .nn_graph import TensorPurpose, TensorFormat, PassPlacement
+from .operation import NpuBlockType
+
+
+def purpose_from_list(lst):
+ def purpose(op, idx):
+ return lst[idx]
+
+ return purpose
+
+
+def all_fm(op, idx):
+ return TensorPurpose.FeatureMap
+
+
+def all_parameter(op, idx):
+ return TensorPurpose.FeatureMap
+
+
+def input0_from_output_rest_parameter(op, idx):
+ if idx == 0:
+ res = op.outputs[0].purpose
+ if res == TensorPurpose.Unknown:
+ print("Warning: Propagating unknown tensor purpose", op)
+ return res
+ return TensorPurpose.FeatureMap
+
+
+def inputs_from_output(op, idx):
+ res = op.outputs[0].purpose
+ if res == TensorPurpose.Unknown:
+ print("Warning: Propagating unknown tensor purpose", op)
+ return res
+
+tensor_purposes = [ # ops, input_purpose
+ (
+ set(
+ (
+ "Relu",
+ "Relu6",
+ "Mul",
+ "Add",
+ "Sub",
+ "Rsqrt",
+ "Abs",
+ "Cast",
+ "Exp",
+ "Floor",
+ "FloorDiv",
+ "FloorMod",
+ "SquaredDifference",
+ "AddN",
+ "BiasAdd",
+ "RealDiv",
+ "Maximum",
+ "Minimum",
+ "Sigmoid",
+ "Tanh",
+ "FusedBatchNorm",
+ "AvgPool",
+ "MaxPool",
+ "Squeeze",
+ "Softmax",
+ "LRN",
+ "Assign",
+ "BatchMatMul",
+ "ZerosLike",
+ "ExtractImagePatches",
+ "MulAct",
+ "AddAct",
+ "SubAct",
+ "DivAct",
+ "AvgPoolAct",
+ "MaxPoolAct",
+ "LeakyRelu",
+ )
+ ),
+ all_fm,
+ ),
+ (
+ set(
+ (
+ "Conv2D",
+ "DepthwiseConv2dNative",
+ "MatMul",
+ "Conv2DBiasAct",
+ "DepthwiseConv2dBiasAct",
+ "FullyConnectedAct",
+ )
+ ),
+ purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.Weights, TensorPurpose.FeatureMap]),
+ ),
+ (
+ set(("Conv2DBackpropInputSwitched",)),
+ purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.Weights, TensorPurpose.FeatureMap]),
+ ),
+ (
+ set(("QuantizedConv2D", "QuantizedMatMul")),
+ purpose_from_list(
+ [
+ TensorPurpose.FeatureMap,
+ TensorPurpose.Weights,
+ TensorPurpose.FeatureMap,
+ TensorPurpose.FeatureMap,
+ TensorPurpose.FeatureMap,
+ TensorPurpose.FeatureMap,
+ ]
+ ),
+ ),
+ (
+ set(
+ (
+ "Reshape",
+ "Min",
+ "Max",
+ "Mean",
+ "Pad",
+ "MirrorPad",
+ "ArgMax",
+ "ArgMin",
+ "ExpandDims",
+ "ResizeNearestNeighbor",
+ "ResizeBilinear",
+ "Tile",
+ "Transpose",
+ "Mfcc",
+ )
+ ),
+ purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]),
+ ),
+ (
+ set(("QuantizedReshape", "QuantizedResizeBilinear")),
+ purpose_from_list(
+ [TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]
+ ),
+ ),
+ (
+ set(("QuantizedBiasAdd", "QuantizedAdd", "QuantizedMul")),
+ purpose_from_list(
+ [
+ TensorPurpose.FeatureMap,
+ TensorPurpose.FeatureMap,
+ TensorPurpose.FeatureMap,
+ TensorPurpose.FeatureMap,
+ TensorPurpose.FeatureMap,
+ TensorPurpose.FeatureMap,
+ ]
+ ),
+ ),
+ (
+ set(
+ (
+ "Dequantize",
+ "Quantize",
+ "QuantizeV2",
+ "QuantizedRelu",
+ "QuantizedRelu1",
+ "QuantizedRelu6",
+ "QuantizedAvgPool",
+ "QuantizedMaxPool",
+ "Slice",
+ "SplitV",
+ )
+ ),
+ purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]),
+ ),
+ (
+ set(("BatchToSpaceND", "SpaceToBatchND", "DepthToSpaceND", "SpaceToDepthND")),
+ purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]),
+ ),
+ (
+ set(("BlockLSTM",)),
+ purpose_from_list(
+ [
+ TensorPurpose.FeatureMap,
+ TensorPurpose.FeatureMap,
+ TensorPurpose.FeatureMap,
+ TensorPurpose.FeatureMap,
+ TensorPurpose.Weights,
+ TensorPurpose.FeatureMap,
+ TensorPurpose.FeatureMap,
+ TensorPurpose.FeatureMap,
+ TensorPurpose.FeatureMap,
+ ]
+ ),
+ ),
+ (set(("SplitSliceRead",)), purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.FeatureMap])),
+ (set(("Shape", "ConcatSliceWrite", "AudioSpectrogram")), purpose_from_list([TensorPurpose.FeatureMap])),
+ (
+ set(("StridedSlice",)),
+ purpose_from_list(
+ [TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]
+ ),
+ ),
+ (set(("Fill", "Pack", "Range")), all_parameter),
+ (
+ set(("Requantize",)),
+ purpose_from_list(
+ [
+ TensorPurpose.FeatureMap,
+ TensorPurpose.FeatureMap,
+ TensorPurpose.FeatureMap,
+ TensorPurpose.FeatureMap,
+ TensorPurpose.FeatureMap,
+ ]
+ ),
+ ),
+ (set(("Placeholder", "SubgraphInput", "Const", "VariableV2")), purpose_from_list([])),
+ (set(("FakeQuantWithMinMaxArgs", "FakeQuantWithMinMaxVars")), input0_from_output_rest_parameter),
+ (
+ set(("Square", "Sqrt", "Log", "Less", "Enter", "Exit", "Identity", "StopGradient", "Merge", "Switch")),
+ inputs_from_output,
+ ),
+ (None, all_fm),
+]
+
+
+for ops, input_purpose in tensor_purposes:
+ if ops is None:
+ continue
+ for op in ops:
+ assert len(op) > 1, "string literal has been decomposed"
+
+
+def mark_tensor_purpose(nng, arch, verbose_tensor_purpose=False):
+ def mark_tensor_helper(tens, purpose):
+
+ if tens.purpose == TensorPurpose.Unknown or tens.purpose == purpose:
+ tens.purpose = purpose
+ else:
+ assert 0, "Cannot resolve tensor purpose %s and %s for tensor %s" % (tens.purpose, purpose, tens)
+ tens.mem_area = arch.tensor_storage_mem_area[tens.purpose]
+
+ if len(tens.ops) == 1 and tens.ops[0].type == "Const":
+ tens.mem_area = (
+ arch.permanent_storage_mem_area
+ ) # special case constants, as they must be in permanent storage
+
+ def rewrite_mark_tensor_purpose(op, arch):
+ # find disconnected outputs and mark as parameters
+ for tens in op.outputs:
+ if not tens.consumers():
+ mark_tensor_helper(tens, TensorPurpose.FeatureMap)
+
+ for ops, input_purpose in tensor_purposes:
+ if ops is None or op.type in ops:
+ if ops is None:
+ print(
+ "warning: don't know how to mark up purpose for",
+ op.type,
+ op.inputs,
+ "triggering all feature map fallback",
+ )
+ for idx, tens in enumerate(op.inputs):
+ purpose = input_purpose(op, idx)
+ mark_tensor_helper(tens, purpose)
+ break
+ return op
+
+ for sg in nng.subgraphs:
+ sg = rewrite_graph.rewrite_graph_pre_order(sg, arch, [], [rewrite_mark_tensor_purpose])
+ for tens in sg.output_tensors:
+ mark_tensor_helper(tens, TensorPurpose.FeatureMap)
+
+ if verbose_tensor_purpose:
+ nng.print_graph_with_tensors()
+
+ return nng
+
+
+reshape_operations = set(
+ (
+ "Reshape",
+ "QuantizedReshape",
+ "ExpandDims",
+ "Squeeze",
+ "BatchToSpaceND",
+ "SpaceToBatchND",
+ "DepthToSpaceND",
+ "SpaceToDepthND",
+ "Placeholder",
+ )
+)
+
+
+def mark_tensor_format(nng, arch, verbose_tensor_format=False):
+ formats_for_tensor = {}
+
+ def init_tens(tens):
+ if tens.purpose == TensorPurpose.FeatureMap:
+ fmt = arch.default_feature_map_format
+ elif tens.purpose == TensorPurpose.Weights:
+ fmt = arch.default_weight_format
+ else:
+ assert 0, "unknown tensor purpose %s" % (tens.purpose,)
+ return fmt
+
+ def find_npu_usage_of_tensor(tens):
+ for op in tens.consumers():
+ if op.type == "DMA":
+ return find_npu_usage_of_tensor(op.outputs[0])
+ if "npu_block_type" in op.attrs:
+ return op.attrs["npu_block_type"]
+ return NpuBlockType.Default
+
+ def visit_tens(tens, ps):
+ if not tens in formats_for_tensor:
+ fmt = init_tens(tens)
+ else:
+ fmt = formats_for_tensor[tens]
+
+ formats_for_tensor[tens] = fmt
+
+ for sg in nng.subgraphs:
+ for ps in sg.passes:
+ for tens in ps.outputs:
+ visit_tens(tens, ps)
+ for tens in ps.intermediates:
+ visit_tens(tens, ps)
+ for tens in ps.inputs:
+ visit_tens(tens, ps)
+
+ for tens, fmt in formats_for_tensor.items():
+ tens.set_format(fmt, arch)
+ if fmt == TensorFormat.WeightsCompressed and tens.values is not None:
+ npu_block_type = find_npu_usage_of_tensor(tens)
+ if len(tens.ops) == 1 and tens.ops[0].type == "DMA":
+ weight_compressor.compress_weights(tens, arch, npu_block_type, Block(32, 32, 32), 32)
+ # Alias compressed weights back into source tensor
+ src_tens = tens.ops[0].inputs[0]
+ src_tens.compressed_values = tens.compressed_values
+ src_tens.storage_shape = tens.storage_shape
+ src_tens.brick_size = tens.brick_size
+ src_tens.weight_compression_scales = tens.weight_compression_scales
+ src_tens.weight_compressed_offsets = tens.weight_compressed_offsets
+ src_tens.compression_scale_for_worst_weight_stream = tens.compression_scale_for_worst_weight_stream
+ src_tens.storage_compression_scale = tens.storage_compression_scale
+
+ if verbose_tensor_format:
+ nng.print_passes_with_tensors()