diff options
author | Tim Hall <tim.hall@arm.com> | 2021-06-08 21:25:57 +0100 |
---|---|---|
committer | Tim Hall <tim.hall@arm.com> | 2021-06-08 21:25:57 +0100 |
commit | d784af7e8995a10fb403157af48371699c35bbfe (patch) | |
tree | bf40b35b030d560049cef9411293b51e3d70ff4a /ethosu/vela/high_level_command_to_npu_op.py | |
parent | 225e19d3640288e991475ee4c49cb3ffd83cc83b (diff) | |
download | ethos-u-vela-d784af7e8995a10fb403157af48371699c35bbfe.tar.gz |
MLBEDSW-4602: Fix Deepspeech scale & bias reuse issue.
- Deepspeech reuses identical weights and biases throughout
the network. Since biases are now interleaved with weights
there is a scaling issue when the ifm scales differ between
operations using the same weight and scale tensor.
- This commit uses interleaved weights/scales on their first use
but separates scales to source memory on subsequent use (if
the ifm scale is different).
Signed-off-by: Tim Hall <tim.hall@arm.com>
Change-Id: I7aae163438160a919cae04e235966e75355a6148
Diffstat (limited to 'ethosu/vela/high_level_command_to_npu_op.py')
-rw-r--r-- | ethosu/vela/high_level_command_to_npu_op.py | 23 |
1 files changed, 17 insertions, 6 deletions
diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py index 4ef7bee8..80d0e476 100644 --- a/ethosu/vela/high_level_command_to_npu_op.py +++ b/ethosu/vela/high_level_command_to_npu_op.py @@ -267,11 +267,14 @@ def create_feature_map(tens: Tensor, box: Box, arch: ArchitectureFeatures, op_sh return fm -def create_weights(weight_tensor: Tensor, weight_box: Box, arch: ArchitectureFeatures) -> List[NpuAddressRange]: +def create_weights( + weight_tensor: Tensor, weight_box: Box, scale_tensor: Tensor, arch: ArchitectureFeatures +) -> List[NpuAddressRange]: """Returns address ranges for weights and scales""" weights = [] biases = [] - region = get_region(weight_tensor.mem_type, arch) + shared_region = get_region(weight_tensor.mem_type, arch) + scale_region = scale_tensor and get_region(scale_tensor.mem_type, arch) w_tensor_src = weight_tensor if weight_tensor.src_tensor: @@ -300,11 +303,19 @@ def create_weights(weight_tensor: Tensor, weight_box: Box, arch: ArchitectureFea # Location of weights in tensor addr_range = NpuAddressRange( - region, int(address + weight_range.weight_offset), round_up(int(weight_range.weight_bytes), 16) + shared_region, int(address + weight_range.weight_offset), round_up(int(weight_range.weight_bytes), 16) ) weights.append(addr_range) - # Location of biases in tensor - addr_range = NpuAddressRange(region, int(address), round_up(int(weight_range.scale_bytes), 16)) + + # Location of standalone scales or combined weights tensor scales + if scale_tensor: + assert scale_tensor.src_tensor is None # Must be standalone + scale_range = scale_tensor.encoded_ranges[key] + address = scale_tensor.address + scale_range.offset + addr_range = NpuAddressRange(scale_region, int(address), round_up(int(scale_range.scale_bytes), 16)) + else: + addr_range = NpuAddressRange(shared_region, int(address), round_up(int(weight_range.scale_bytes), 16)) + biases.append(addr_range) return weights, biases @@ -351,7 +362,7 @@ def set_common_op_fields(npu_op: NpuBlockOperation, cmd: NpuStripe, arch: Archit npu_op.ofm.quantization = get_ofm_quantization(ps, cmd.ofm_tensor) if cmd.weight_tensor is not None: - npu_op.weights, npu_op.biases = create_weights(cmd.weight_tensor, cmd.weight_box, arch) + npu_op.weights, npu_op.biases = create_weights(cmd.weight_tensor, cmd.weight_box, cmd.scale_tensor, arch) npu_op.activation = create_npu_activation(op) npu_op.fused_quantize = any(op.type == Op.Quantize for op in ps.ops) npu_op.rounding_mode = get_rounding_mode(op, npu_op.fused_quantize) |