aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/NetworkUtils.cpp
diff options
context:
space:
mode:
authorNarumol Prangnawarat <narumol.prangnawarat@arm.com>2020-03-20 15:01:01 +0000
committerNarumol Prangnawarat <narumol.prangnawarat@arm.com>2020-03-20 19:09:07 +0000
commitbc7ffb5e9e5f4c86280b20c65772eb12d8bb140e (patch)
tree5187f34326414e7dfea80e0f4efaae5cbeb05d1d /src/armnn/NetworkUtils.cpp
parentcf2ad554502830804e991aca2e5b0741623119b2 (diff)
downloadarmnn-bc7ffb5e9e5f4c86280b20c65772eb12d8bb140e.tar.gz
IVGCVSW-4520 Implement BFloat16 Optimizer
* Add ReduceFp32ToBf16 to OptimizerOptions * Add ConvertFp32NetworkToBf16 * Add utility functions to insert conversion layers * Add constant conversion BF16 <-> FP32 * Unit tests Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com> Change-Id: Iaff77e20c721400b052cb37eb9ef6fe16d7abaff
Diffstat (limited to 'src/armnn/NetworkUtils.cpp')
-rw-r--r--src/armnn/NetworkUtils.cpp87
1 files changed, 85 insertions, 2 deletions
diff --git a/src/armnn/NetworkUtils.cpp b/src/armnn/NetworkUtils.cpp
index 1bbeaac005..8653a08510 100644
--- a/src/armnn/NetworkUtils.cpp
+++ b/src/armnn/NetworkUtils.cpp
@@ -16,7 +16,7 @@ namespace armnn
namespace
{
-void UpdateOutputSlotFp16ToFp32(OutputSlot& outputSlot)
+void UpdateOutputSlotToFp32(OutputSlot& outputSlot)
{
const TensorInfo& origTensorInfo = outputSlot.GetTensorInfo();
TensorInfo newTensorInfo(origTensorInfo);
@@ -24,19 +24,69 @@ void UpdateOutputSlotFp16ToFp32(OutputSlot& outputSlot)
outputSlot.SetTensorInfo(newTensorInfo);
}
+void ChangeOutputBf16ToFp32(Layer& layer)
+{
+ for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
+ {
+ if (outputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16)
+ {
+ UpdateOutputSlotToFp32(*outputSlot);
+ }
+ }
+}
+
void ChangeOutputFp16ToFp32(Layer& layer)
{
for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
{
if (outputSlot->GetTensorInfo().GetDataType() == DataType::Float16)
{
- UpdateOutputSlotFp16ToFp32(*outputSlot);
+ UpdateOutputSlotToFp32(*outputSlot);
}
}
}
} // anonymous namespace
+std::vector<ConvertBf16ToFp32Layer*> InsertConvertBf16ToFp32LayersBefore(Graph& graph,
+ Layer& layer,
+ bool expectCorrectInputType)
+{
+ std::vector<ConvertBf16ToFp32Layer*> convertLayers;
+ convertLayers.reserve(layer.GetNumInputSlots());
+
+ // Insert a ConvertBf16ToFp32Layer before each input slot
+ for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
+ {
+ bool allowInsert = true;
+ if (expectCorrectInputType)
+ {
+ // Only insert ConvertBf16ToFp32Layer before BF16 input slots
+ OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
+ allowInsert =
+ connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16;
+ }
+
+ if (allowInsert)
+ {
+ const std::string name =
+ std::string("convert_bf16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
+ layer.GetName();
+ ConvertBf16ToFp32Layer* convertLayer =
+ graph.InsertNewLayer<ConvertBf16ToFp32Layer>(*inputSlot, name.c_str());
+
+ TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
+ convertInfo.SetDataType(DataType::Float32);
+
+ convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
+
+ convertLayers.emplace_back(convertLayer);
+ }
+ }
+
+ return convertLayers;
+}
+
std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& graph,
Layer& layer,
bool expectCorrectInputType)
@@ -76,6 +126,39 @@ std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph&
return convertLayers;
}
+std::vector<ConvertFp32ToBf16Layer*> InsertConvertFp32ToBf16LayersAfter(Graph& graph, Layer& layer)
+{
+ const unsigned int numOutputSlots = layer.GetNumOutputSlots();
+
+ std::vector<ConvertFp32ToBf16Layer*> convertLayers;
+ convertLayers.reserve(numOutputSlots);
+
+ // Update Bf16 output slots to FP32 on current layer
+ ChangeOutputBf16ToFp32(layer);
+
+ // Insert a ConvertFp32ToBf16Layer after each FP32 output slot
+ for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex)
+ {
+ OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex);
+ if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32)
+ {
+ const std::string name =
+ std::string("convert_fp32_to_bf16-" + std::to_string(slotIndex) + "-") + layer.GetName();
+ ConvertFp32ToBf16Layer* convertLayer =
+ graph.InsertNewLayer<ConvertFp32ToBf16Layer>(outputSlot, name.c_str());
+
+ TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
+ convertInfo.SetDataType(DataType::BFloat16);
+
+ convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
+
+ convertLayers.emplace_back(convertLayer);
+ }
+ }
+
+ return convertLayers;
+}
+
std::vector<ConvertFp32ToFp16Layer*> InsertConvertFp32ToFp16LayersAfter(Graph& graph, Layer& layer)
{
const unsigned int numOutputSlots = layer.GetNumOutputSlots();