aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLouis Verhaard <louis.verhaard@arm.com>2020-10-14 08:32:41 +0200
committerLouis Verhaard <louis.verhaard@arm.com>2020-10-20 08:50:29 +0200
commit17afa2837ad366f2da32e2bc0e2659ebb35bd1d5 (patch)
tree7329fe546be4e2a95e205daf83637c7927bf7684
parent6e827082524af57bf04833c30754384b46216e59 (diff)
downloadethos-u-vela-17afa2837ad366f2da32e2bc0e2659ebb35bd1d5.tar.gz
MLBEDSW-3268: Refactor mark_tensors
- Refactored mark_tensor_purpose - Initial weight compression is now always done in insert_dma - Removed mark_tensor_format Change-Id: Ic719b9bcd1d27e1390d7b9ce8cd21795139ec814 Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
-rw-r--r--ethosu/vela/compiler_driver.py1
-rw-r--r--ethosu/vela/insert_dma.py24
-rw-r--r--ethosu/vela/mark_tensors.py329
-rw-r--r--ethosu/vela/rewrite_graph.py22
4 files changed, 76 insertions, 300 deletions
diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py
index 6c1142d1..05bf65a4 100644
--- a/ethosu/vela/compiler_driver.py
+++ b/ethosu/vela/compiler_driver.py
@@ -147,7 +147,6 @@ def compiler_driver(nng, arch, options, scheduler_options):
extract_npu_subgraphs.extract_npu_subgraphs(nng, arch)
- mark_tensors.mark_tensor_format(nng, arch, options.verbose_tensor_format)
assert verify_graph_health(nng)
if options.timing:
start = time.time()
diff --git a/ethosu/vela/insert_dma.py b/ethosu/vela/insert_dma.py
index 56d68d13..fc1e7986 100644
--- a/ethosu/vela/insert_dma.py
+++ b/ethosu/vela/insert_dma.py
@@ -26,6 +26,7 @@ from .weight_compressor import compress_weights
def weights_fit_sram(arch, op, tens, nng):
+ # Compresses weights and checks if they fit in SRAM
if tens.purpose != TensorPurpose.Weights:
return True
@@ -35,22 +36,17 @@ def weights_fit_sram(arch, op, tens, nng):
elif len(tens.shape) == 2:
min_weight_size = tens.shape[0] * arch.OFMSplitDepth
- # Need to be fit into Sram, as a double buffer
- # Only evaluate when the compression test limit will make it impossible to fit
- w_comp_test_limit = 2
- if (w_comp_test_limit * min_weight_size * 2) > arch.sram_size:
- # check worst compression ratio
- npu_block_type = op.attrs.get("npu_block_type", NpuBlockType.Default)
- compress_weights(arch, nng, tens, npu_block_type, 16, 16, op.get_dilation_h_w())
+ compress_weights(arch, nng, tens, op.type.npu_block_type, 16, 16, op.get_dilation_h_w())
- worst_buffer_size = tens.compression_scale_for_worst_weight_stream * min_weight_size * 2
- if worst_buffer_size > arch.sram_size:
- print(
- "Weights, {}, are too big to be DMAed to SRAM, estimated minimum size is {} bytes".format(
- tens.name, worst_buffer_size
- )
+ # Need to be fit into Sram, as a double buffer
+ worst_buffer_size = tens.compression_scale_for_worst_weight_stream * min_weight_size * 2
+ if worst_buffer_size > arch.sram_size:
+ print(
+ "Weights, {}, are too big to be DMAed to SRAM, estimated minimum size is {} bytes".format(
+ tens.name, worst_buffer_size
)
- return False
+ )
+ return False
return True
diff --git a/ethosu/vela/mark_tensors.py b/ethosu/vela/mark_tensors.py
index 206d8365..1379628a 100644
--- a/ethosu/vela/mark_tensors.py
+++ b/ethosu/vela/mark_tensors.py
@@ -14,295 +14,82 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# Description:
-# Mark purpose and select formats for Tensors. Also compresses the weights.
-from . import rewrite_graph
-from . import weight_compressor
+# Mark purpose and select formats for Tensors.
from .errors import OperatorError
from .operation import CustomType
from .operation import Op
+from .rewrite_graph import visit_graph_post_order
from .tensor import MemType
from .tensor import TensorFormat
from .tensor import TensorPurpose
-def purpose_from_list(lst):
- def purpose(op, idx):
- return lst[idx]
-
- return purpose
-
-
-def all_fm(op, idx):
- return TensorPurpose.FeatureMap
-
-
-def all_parameter(op, idx):
- return TensorPurpose.FeatureMap
-
-
-def input0_from_output_rest_parameter(op, idx):
- if idx == 0:
- res = op.outputs[0].purpose
- if res == TensorPurpose.Unknown:
- print("Warning: Propagating unknown tensor purpose", op)
- return res
- return TensorPurpose.FeatureMap
-
-
-def inputs_from_output(op, idx):
- res = op.outputs[0].purpose
- if res == TensorPurpose.Unknown:
- print("Warning: Propagating unknown tensor purpose", op)
- return res
-
+def get_format(purpose, arch):
+ if purpose in (TensorPurpose.FeatureMap, TensorPurpose.LUT, TensorPurpose.Scratch):
+ fmt = arch.default_feature_map_format
+ elif purpose == TensorPurpose.Weights:
+ fmt = arch.default_weight_format
+ elif purpose == TensorPurpose.Unknown:
+ fmt = TensorFormat.Unknown
+ else:
+ assert 0, "unknown tensor purpose {}".format(purpose)
+ return fmt
+
+
+def mark_purpose(tens, arch, purpose):
+ # Sets tensor's purpose, format, mem_area and mem_type
+ if tens.purpose == TensorPurpose.Unknown:
+ tens.purpose = purpose
+ elif tens.purpose not in (purpose, TensorPurpose.LUT):
+ assert 0, "Cannot resolve tensor purpose {} and {} for tensor {}".format(tens.purpose, purpose, tens)
+ fmt = get_format(purpose, arch)
+ tens.set_format(fmt, arch)
+ tens.mem_area = arch.tensor_storage_mem_area[tens.purpose]
+ tens.mem_type = arch.tensor_storage_mem_type[tens.purpose]
+
+ if len(tens.ops) == 1 and tens.ops[0].type == Op.Const:
+ tens.mem_area = arch.permanent_storage_mem_area # special case constants, as they must be in permanent storage
+ tens.mem_type = MemType.Permanent_NPU
+
+
+def rewrite_mark_tensor_purpose(op, arch):
+ # find disconnected outputs and mark as feature maps
+ for tens in op.outputs:
+ if not tens.consumers():
+ mark_purpose(tens, arch, TensorPurpose.FeatureMap)
+ weight_tensors = op.get_weight_tensors()
+ for tens in op.inputs:
+ if tens.purpose != TensorPurpose.Unknown:
+ purpose = tens.purpose
+ elif tens in weight_tensors:
+ purpose = TensorPurpose.Weights
+ else:
+ purpose = TensorPurpose.FeatureMap
+ mark_purpose(tens, arch, purpose)
+ if op.type == Op.Reshape:
+ # Reshape's input and output point to same data
+ op.ofm.mem_area = op.ifm.mem_area
-tensor_purposes = [ # ops, input_purpose
- (
- set(
- (
- Op.Relu,
- Op.Relu6,
- Op.Rsqrt,
- Op.Abs,
- Op.Cast,
- Op.Exp,
- Op.Floor,
- Op.FloorDiv,
- Op.FloorMod,
- Op.SquaredDifference,
- Op.AddN,
- Op.Maximum,
- Op.Minimum,
- Op.Sigmoid,
- Op.Tanh,
- Op.AvgPool,
- Op.MaxPool,
- Op.Squeeze,
- Op.Softmax,
- Op.LRN,
- Op.BatchMatMul,
- Op.ZerosLike,
- Op.Mul,
- Op.Add,
- Op.Sub,
- Op.Div,
- Op.LeakyRelu,
- Op.CLZ,
- Op.SHL,
- Op.SHR,
- Op.ReduceSum,
- )
- ),
- all_fm,
- ),
- (
- set((Op.Conv2D, Op.MatMul, Op.Conv2DBias, Op.DepthwiseConv2DBias, Op.FullyConnected,)),
- purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.Weights, TensorPurpose.FeatureMap]),
- ),
- (
- set((Op.Conv2DBackpropInputSwitchedBias,)),
- purpose_from_list(
- [TensorPurpose.FeatureMap, TensorPurpose.Weights, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]
- ),
- ),
- (
- set((Op.QuantizedConv2D, Op.QuantizedMatMul)),
- purpose_from_list(
- [
- TensorPurpose.FeatureMap,
- TensorPurpose.Weights,
- TensorPurpose.FeatureMap,
- TensorPurpose.FeatureMap,
- TensorPurpose.FeatureMap,
- TensorPurpose.FeatureMap,
- ]
- ),
- ),
- (
- set(
- (
- Op.Reshape,
- Op.Min,
- Op.Max,
- Op.Mean,
- Op.Pad,
- Op.MirrorPad,
- Op.ArgMax,
- Op.ArgMin,
- Op.ExpandDims,
- Op.ResizeNearestNeighbor,
- Op.ResizeBilinear,
- Op.Tile,
- Op.Transpose,
- )
- ),
- purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]),
- ),
- (
- set((Op.QuantizedReshape,)),
- purpose_from_list(
- [TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]
- ),
- ),
- (
- set((Op.Dequantize, Op.Quantize, Op.QuantizedAvgPool, Op.QuantizedMaxPool, Op.Slice, Op.SplitV,)),
- purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]),
- ),
- (
- set((Op.BatchToSpaceND, Op.SpaceToBatchND, Op.DepthToSpace, Op.SpaceToDepth)),
- purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]),
- ),
- (
- set((Op.BlockLSTM,)),
- purpose_from_list(
- [
- TensorPurpose.FeatureMap,
- TensorPurpose.FeatureMap,
- TensorPurpose.FeatureMap,
- TensorPurpose.FeatureMap,
- TensorPurpose.Weights,
- TensorPurpose.FeatureMap,
- TensorPurpose.FeatureMap,
- TensorPurpose.FeatureMap,
- TensorPurpose.FeatureMap,
- ]
- ),
- ),
- (set((Op.SplitSliceRead,)), purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.FeatureMap])),
- (set((Op.Shape, Op.ConcatSliceWrite)), purpose_from_list([TensorPurpose.FeatureMap])),
- (
- set((Op.StridedSlice,)),
- purpose_from_list(
- [TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]
- ),
- ),
- (set((Op.Fill, Op.Pack, Op.Range)), all_parameter),
- (set((Op.Placeholder, Op.SubgraphInput, Op.Const,)), purpose_from_list([])),
- (set((Op.FakeQuantWithMinMaxArgs,)), input0_from_output_rest_parameter),
- (set((Op.Square, Op.Sqrt, Op.Log, Op.Less, Op.Identity,)), inputs_from_output,),
- (None, all_fm),
-]
+ if op.type == Op.Custom and op.attrs.get("custom_type") == CustomType.ExistingNpuOp:
+ scratch_tensor = None
+ if len(op.inputs) >= 3:
+ scratch_tensor = op.inputs[2] # should be existing scratch tensor
+ if scratch_tensor.name.endswith("_scratch"):
+ scratch_tensor.purpose = TensorPurpose.Scratch
-for ops, input_purpose in tensor_purposes:
- if ops is None:
- continue
+ if scratch_tensor is None:
+ OperatorError(op, "Scratch tensor not found.")
def mark_tensor_purpose(nng, arch, verbose_tensor_purpose=False):
- def mark_tensor_helper(tens, purpose):
- if tens.purpose == TensorPurpose.Unknown or tens.purpose == purpose:
- tens.purpose = purpose
- elif tens.purpose != TensorPurpose.LUT:
- assert 0, "Cannot resolve tensor purpose %s and %s for tensor %s" % (tens.purpose, purpose, tens)
- tens.mem_area = arch.tensor_storage_mem_area[tens.purpose]
- tens.mem_type = arch.tensor_storage_mem_type[tens.purpose]
-
- if len(tens.ops) == 1 and tens.ops[0].type == Op.Const:
- tens.mem_area = (
- arch.permanent_storage_mem_area
- ) # special case constants, as they must be in permanent storage
- tens.mem_type = MemType.Permanent_NPU
-
- def rewrite_mark_tensor_purpose(op, arch, nng):
- # find disconnected outputs and mark as parameters
- for tens in op.outputs:
- if not tens.consumers():
- mark_tensor_helper(tens, TensorPurpose.FeatureMap)
-
- for ops, input_purpose in tensor_purposes:
- if ops is None or op.type in ops:
- if ops is None:
- print(
- "Warning: Don't know how to mark up purpose for",
- op.type,
- op.inputs,
- "triggering all feature map fallback",
- )
-
- for idx, tens in enumerate(op.inputs):
- if tens is None:
- continue
- purpose = input_purpose(op, idx) if tens.purpose == TensorPurpose.Unknown else tens.purpose
- mark_tensor_helper(tens, purpose)
-
- if op.type == Op.Reshape:
- # Reshape's input and output point to same data
- op.outputs[0].mem_area = op.inputs[0].mem_area
-
- if op.type == Op.Custom and op.attrs.get("custom_type") == CustomType.ExistingNpuOp:
- scratch_tensor = None
-
- if len(op.inputs) >= 3:
- scratch_tensor = op.inputs[2] # should be existing scratch tensor
- if scratch_tensor.name.endswith("_scratch"):
- scratch_tensor.purpose = TensorPurpose.Scratch
-
- if scratch_tensor is None:
- OperatorError(op, "Scratch tensor not found.")
-
- break
-
- return op
-
+ # Sets purpose, format, mem_area and mem_type for all tensors in the graph
for sg in nng.subgraphs:
- sg = rewrite_graph.rewrite_graph_pre_order(nng, sg, arch, [], [rewrite_mark_tensor_purpose])
+ visit_graph_post_order(sg.output_tensors, arch, [], [rewrite_mark_tensor_purpose])
for tens in sg.output_tensors:
- mark_tensor_helper(tens, TensorPurpose.FeatureMap)
+ mark_purpose(tens, arch, TensorPurpose.FeatureMap)
if verbose_tensor_purpose:
nng.print_graph_with_tensors()
return nng
-
-
-def mark_tensor_format(nng, arch, verbose_tensor_format=False):
- formats_for_tensor = {}
-
- def init_tens(tens):
- if tens.purpose in (TensorPurpose.FeatureMap, TensorPurpose.LUT):
- fmt = arch.default_feature_map_format
- elif tens.purpose == TensorPurpose.Weights:
- fmt = arch.default_weight_format
- elif tens.purpose == TensorPurpose.Scratch:
- fmt = arch.default_feature_map_format
- elif tens.purpose == TensorPurpose.Unknown:
- fmt = TensorFormat.Unknown
- else:
- assert 0, "unknown tensor purpose %s" % (tens.purpose,)
- return fmt
-
- def visit_tens(tens, ps):
- if tens not in formats_for_tensor:
- fmt = init_tens(tens)
- else:
- fmt = formats_for_tensor[tens]
-
- formats_for_tensor[tens] = fmt
-
- for sg in nng.subgraphs:
- for ps in sg.passes:
- for tens in ps.outputs:
- visit_tens(tens, ps)
- for tens in ps.intermediates:
- visit_tens(tens, ps)
- for tens in ps.inputs:
- visit_tens(tens, ps)
-
- for tens, fmt in formats_for_tensor.items():
- if len(tens.shape) > 4:
- continue
- tens.set_format(fmt, arch)
- if fmt == TensorFormat.WeightsCompressed and tens.values is not None:
- src_tens = tens.get_dma_src_tensor()
- if src_tens is not None:
- op = tens.find_npu_op()
- if op is not None:
- weight_compressor.compress_weights(
- arch, nng, tens, op.type.npu_block_type, 16, 16, op.get_dilation_h_w()
- )
- # Alias compressed weights back into source tensor
- src_tens.copy_compressed_weight_info(tens)
-
- if verbose_tensor_format:
- nng.print_passes_with_tensors()
diff --git a/ethosu/vela/rewrite_graph.py b/ethosu/vela/rewrite_graph.py
index e71b228a..42acaf9b 100644
--- a/ethosu/vela/rewrite_graph.py
+++ b/ethosu/vela/rewrite_graph.py
@@ -82,14 +82,16 @@ def rewrite_graph_pre_order(nng, sg, arch, tensor_rewrite_list, op_rewrite_list,
return sg
-def visit_graph_post_order(sg, arch, tensor_visit_list, op_visit_list):
-
+def visit_graph_post_order(start_tensors, arch, tensor_visit_list, op_visit_list):
+ # Depth-first graph traversal, starting from the given list of tensors
+ # (typically a subgraph's output_tensors).
+ # Visits ops and tensors in input to output order.
op_visit_dict = dict()
tens_visit_dict = dict()
def visit_op(op):
if op in op_visit_dict:
- return op_visit_dict[op]
+ return
op_visit_dict[op] = op
for tens in op.inputs:
@@ -101,11 +103,9 @@ def visit_graph_post_order(sg, arch, tensor_visit_list, op_visit_list):
for tens in op.outputs:
visit_tens(tens)
- return op
-
def visit_tens(tens):
- if tens in tens_visit_dict:
- return tens_visit_dict[tens]
+ if tens is None or tens in tens_visit_dict:
+ return
tens_visit_dict[tens] = tens
@@ -115,15 +115,9 @@ def visit_graph_post_order(sg, arch, tensor_visit_list, op_visit_list):
for visit in tensor_visit_list:
visit(tens, arch)
- return tens
-
- for tens in sg.output_tensors:
+ for tens in start_tensors:
visit_tens(tens)
- sg.refresh_after_modification()
-
- return sg
-
def verify_graph_health(nng):