diff options
author | Manupa Karunaratne <manupa.karunaratne@arm.com> | 2020-07-20 12:05:32 +0100 |
---|---|---|
committer | Manupa Karunaratne <manupa.karunaratne@arm.com> | 2020-07-27 13:28:03 +0100 |
commit | d83d2e11d3dff5031fec513ca2aa22c19c9ea4d8 (patch) | |
tree | 0603685c456b736fe9ed9faf44df65fa89a8af03 /ethosu/vela/weight_compressor.py | |
parent | ebe26c7af49ad277df347dac7c4c05959d83f387 (diff) | |
download | ethos-u-vela-d83d2e11d3dff5031fec513ca2aa22c19c9ea4d8.tar.gz |
[EXTAPI] refactor weight compression to be used by an external consumer
*lint
*added unit tests
*added typecheck
*added docstring for the api
Change-Id: Ibd4bc40d4381ac40ad2ea3d500b26c4ec565ab07
Signed-off-by: Manupa Karunaratne <manupa.karunaratne@arm.com>
Diffstat (limited to 'ethosu/vela/weight_compressor.py')
-rw-r--r-- | ethosu/vela/weight_compressor.py | 85 |
1 files changed, 70 insertions, 15 deletions
diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py index 8ebd7511..687a0805 100644 --- a/ethosu/vela/weight_compressor.py +++ b/ethosu/vela/weight_compressor.py @@ -20,7 +20,10 @@ from collections import namedtuple import numpy as np +from .architecture_features import Accelerator +from .architecture_features import ArchitectureFeatures from .data_type import DataType +from .errors import typecheck from .errors import UnsupportedFeatureError from .nn_graph import SchedulingStrategy from .numeric_util import round_up @@ -42,6 +45,55 @@ WeightCompressionConfig = namedtuple( ) +@typecheck +def encode_weights( + accelerator: Accelerator, + weights_volume: np.ndarray, + dilation_xy: tuple, + ifm_bitdepth: int, + ofm_block_depth: int, + is_depthwise: bool, + is_partkernel: bool, +): + """ + Public facing API to use the ethosu weight encoding. + + :param accelerator: architecture_features.Accelerator enum to pick the correct ethosu accelerator + :param weights_volume: numpy.ndarray in OHWI layout with a shape of four + :param dilation_xy: a two element tuple of dilation attributes in x,y dimension + :param ifm_bitdepth: the bitdepth of input feature map + :param ofm_block_depth: the depth of blocks for ethosu processing + :param is_depthwise: a boolean indicating these weights are used for a depthwise traversal + :param is_partkernel: a boolean indicating these weights are traversed on sub-kernal basis + :return: a bytearray of compressed weights + """ + + # Checks for weight layout + assert len(weights_volume.shape) == 4, "weights ndarray should have a shape of 4" + + # It cannot be both partkernel and depthwise + assert not (is_depthwise and is_partkernel), "encode_weights :: partkernel and depthwise are mutually exclusive" + + # Check valid values for dilation + assert dilation_xy[0] in (1, 2), "encode_weights :: dilation x should be 1 or 2 not {}".format(dilation_xy[0]) + assert dilation_xy[1] in (1, 2), "encode_weights :: dilation y should be 1 or 2 not {}".format(dilation_xy[1]) + + ifm_ublock = ArchitectureFeatures.accelerator_configs[accelerator].ifm_ublock + ofm_ublock = ArchitectureFeatures.accelerator_configs[accelerator].ofm_ublock + raw_stream = generate_brick( + ifm_ublock=ifm_ublock, + ofm_ublock=ofm_ublock, + brick_weights=weights_volume, + ofm_block_depth=ofm_block_depth, + is_depthwise=is_depthwise, + is_partkernel=is_partkernel, + ifm_bitdepth=ifm_bitdepth, + dilation=dilation_xy, + ) + encoded_stream = encode(raw_stream) + return encoded_stream + + def create_weight_compression_config(tens, npu_block_type, ofm_block_depth, ofm_depth_step, dilation): # Note: for an ofm block only its depth is used in weight compression. # And block depth > ofm depth gives same result as block depth == ofm depth @@ -93,13 +145,12 @@ def encode(weight_stream): return compressed -def generate_brick(arch, brick_weights, ofm_block_depth, block_traversal, ifm_bitdepth, dilation): - is_depthwise = block_traversal == TensorBlockTraversal.DepthWise - is_partkernel = block_traversal == TensorBlockTraversal.PartKernelFirst - decomp_h = arch.subkernel_max.height // dilation[0] - decomp_w = arch.subkernel_max.width // dilation[1] - ofm_ublock = arch.ofm_ublock - ifm_ublock = arch.ifm_ublock +def generate_brick( + ifm_ublock, ofm_ublock, brick_weights, ofm_block_depth, is_depthwise, is_partkernel, ifm_bitdepth, dilation +): + + decomp_h = ArchitectureFeatures.SubKernelMax.height // dilation[0] + decomp_w = ArchitectureFeatures.SubKernelMax.width // dilation[1] # Expect weights formatted OHWI ofm_depth = brick_weights.shape[-4] ifm_depth = brick_weights.shape[-1] @@ -245,6 +296,9 @@ def compress_weights(arch, nng, tens, npu_block_type, ofm_block_depth, ofm_depth else: tens.block_traversal = TensorBlockTraversal.DepthFirst + is_depthwise = tens.block_traversal == TensorBlockTraversal.DepthWise + is_partkernel = tens.block_traversal == TensorBlockTraversal.PartKernelFirst + if tens.consumer_list[0].type == "Conv2DBackpropInputSwitchedBias": # Transpose Convoluion, reverse weights in H and W axes weights = np.flip(weights, axis=(0, 1)) @@ -262,7 +316,6 @@ def compress_weights(arch, nng, tens, npu_block_type, ofm_block_depth, ofm_depth substream_offsets = [0] encoded_stream = [] - raw_size = 0 # For each core, deinterleave weights from the larger volume # and generate separate compressed streams. @@ -270,15 +323,17 @@ def compress_weights(arch, nng, tens, npu_block_type, ofm_block_depth, ofm_depth core_weights = core_deinterleave(brick_weights, core, arch.ncores) block_depth = (ofm_block_depth + arch.ncores - 1 - core) // arch.ncores + encoded_substream = [] if block_depth != 0: - raw_stream = generate_brick( - arch, core_weights, block_depth, tens.block_traversal, ifm_bitdepth, dilation + encoded_substream = encode_weights( + accelerator=arch.accelerator_config, + weights_volume=core_weights, + dilation_xy=dilation, + ifm_bitdepth=ifm_bitdepth, + ofm_block_depth=block_depth, + is_depthwise=is_depthwise, + is_partkernel=is_partkernel, ) - else: - raw_stream = [] - - raw_size += len(raw_stream) - encoded_substream = encode(raw_stream) encoded_stream.extend(encoded_substream) substream_offsets.append(len(encoded_stream)) |