diff options
author | Narumol Prangnawarat <narumol.prangnawarat@arm.com> | 2020-03-26 09:20:43 +0000 |
---|---|---|
committer | Narumol Prangnawarat <narumol.prangnawarat@arm.com> | 2020-03-26 16:16:55 +0000 |
commit | 57ef0088d20dd708ff92222d244ea02f1e1e5216 (patch) | |
tree | ae11f55f6bac939a51d5182eae441d322efb3e0e /src/armnn/NetworkUtils.cpp | |
parent | 9272f8b9050096f39796227c5d89ed7b9905146d (diff) | |
download | armnn-57ef0088d20dd708ff92222d244ea02f1e1e5216.tar.gz |
IVGCVSW-4597 Modify BF16 optimizer to Convert only inputs and weights of
Convolution2d and FullyConnected layers
* Add InsertConvertFp32ToBf16LayersBefore
* Add ConvertWeight to ConvertFp32NetworkToBf16Impl for Conv2d and FullyConnected
* Allow different input and output when input is BF16 and output is FP32
Conv2d and FullyConnected layers
* Unit tests
Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com>
Change-Id: Ic8f92ff28edcae08a72a3114a28f50c4619f919b
Diffstat (limited to 'src/armnn/NetworkUtils.cpp')
-rw-r--r-- | src/armnn/NetworkUtils.cpp | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/src/armnn/NetworkUtils.cpp b/src/armnn/NetworkUtils.cpp index 8653a08510..0549a115d4 100644 --- a/src/armnn/NetworkUtils.cpp +++ b/src/armnn/NetworkUtils.cpp @@ -87,6 +87,45 @@ std::vector<ConvertBf16ToFp32Layer*> InsertConvertBf16ToFp32LayersBefore(Graph& return convertLayers; } +std::vector<ConvertFp32ToBf16Layer*> InsertConvertFp32ToBf16LayersBefore(Graph& graph, + Layer& layer, + bool expectCorrectInputType) +{ + std::vector<ConvertFp32ToBf16Layer*> convertLayers; + convertLayers.reserve(layer.GetNumInputSlots()); + + // Insert a ConvertFp32ToBf16Layer before each input slot + for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot) + { + bool allowInsert = true; + if (expectCorrectInputType) + { + // Only insert ConvertFp32ToBf16Layer before FP32 input slots + OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot(); + allowInsert = + connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float32; + } + + if (allowInsert) + { + const std::string name = + std::string("convert_fp32_to_bf16-" + std::to_string(inputSlot->GetSlotIndex()) + "-") + + layer.GetName(); + ConvertFp32ToBf16Layer* convertLayer = + graph.InsertNewLayer<ConvertFp32ToBf16Layer>(*inputSlot, name.c_str()); + + TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); + convertInfo.SetDataType(DataType::BFloat16); + + convertLayer->GetOutputSlot().SetTensorInfo(convertInfo); + + convertLayers.emplace_back(convertLayer); + } + } + + return convertLayers; +} + std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& graph, Layer& layer, bool expectCorrectInputType) |