diff options
author | Charles Xu <charles.xu@arm.com> | 2020-05-13 10:15:26 +0200 |
---|---|---|
committer | Tim Hall <tim.hall@arm.com> | 2020-06-18 17:53:52 +0100 |
commit | 78792223369fa34dacd0e69e189af035283da2ae (patch) | |
tree | ac3826df5528866319fd65d7a99eef8e87cd4084 /ethosu/vela/npu_serialisation.py | |
parent | 620d88c60482bad4d96da4d32cc4cca5561cca9e (diff) | |
download | ethos-u-vela-78792223369fa34dacd0e69e189af035283da2ae.tar.gz |
Add elementwise vector scalars support
Write the constant scalars into flash. In case it's Dram
or OffChipFlash, DMA the scalars from flash to sram.
Signed-off-by: Charles Xu <charles.xu@arm.com>
Change-Id: I42300a05dfe968d623b8aec8549644549e0f54b5
Diffstat (limited to 'ethosu/vela/npu_serialisation.py')
-rw-r--r-- | ethosu/vela/npu_serialisation.py | 30 |
1 files changed, 20 insertions, 10 deletions
diff --git a/ethosu/vela/npu_serialisation.py b/ethosu/vela/npu_serialisation.py index b8ac20f3..0cb40ed0 100644 --- a/ethosu/vela/npu_serialisation.py +++ b/ethosu/vela/npu_serialisation.py @@ -46,6 +46,10 @@ def copy_compressed_values_to_memory_tensor(memory_tensor, src_tensor): memory_tensor.values[start_addr:end_addr] = compressed_values start_addr = end_addr +def copy_ifm_values_to_memory_tensor(memory_tensor, src_tensor): + start_addr = src_tensor.address + end_addr = start_addr + src_tensor.quant_values.size + memory_tensor.values[start_addr:end_addr] = src_tensor.quant_values def serialise_npu_subgraph_into_tensors(nng, sg, arch, scratch_tens, flash_tens): if sg.placement != PassPlacement.Npu: @@ -90,16 +94,22 @@ def serialise_npu_subgraph_into_tensors(nng, sg, arch, scratch_tens, flash_tens) for cps in sg.cascaded_passes: for ps in cps.passes: - if ps.placement == PassPlacement.Npu and ps.weight_tensor is not None: - # For DMA ops, ps.weight_tensor is referring to the SRAM weight tensor and therefore the address - # is pointing at the destination address of where the weights should be placed in SRAM. - # This ensures that the Flash weight tensor is used instead and thus gets the correct address. - if ps.weight_tensor.ops[0].type == "DMA": - copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.weight_tensor.ops[0].inputs[0]) - else: - copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.weight_tensor) - - copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.scale_tensor) + if ps.placement == PassPlacement.Npu: + if ps.weight_tensor != None: + # For DMA ops, ps.weight_tensor is referring to the SRAM weight tensor and therefore the address + # is pointing at the destination address of where the weights should be placed in SRAM. + # This ensures that the Flash weight tensor is used instead and thus gets the correct address. + if ps.weight_tensor.ops[0].type == "DMA": + copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.weight_tensor.ops[0].inputs[0]) + else: + copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.weight_tensor) + + copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.scale_tensor) + + if ps.ifm_tensor != None and ps.ifm_tensor.mem_area != MemArea.Sram: + copy_ifm_values_to_memory_tensor(sg.flash_tensor, ps.ifm_tensor) + if ps.ifm2_tensor != None and ps.ifm2_tensor.mem_area != MemArea.Sram: + copy_ifm_values_to_memory_tensor(sg.flash_tensor, ps.ifm2_tensor) sg.command_stream_tensor = make_memory_tensor( sg.name + "_command_stream", flash_area, command_stream_size_bytes, True, arch |