aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAron Virginas-Tar <Aron.Virginas-Tar@arm.com>2019-11-13 15:16:28 +0000
committerÁron Virginás-Tar <aron.virginas-tar@arm.com>2019-11-15 16:54:47 +0000
commit87972be8d838f6fde6f6e98dd81c422e85457a5e (patch)
tree78e8a9abfefc6db67f9a71f6c1fddb0444daac5f
parent5716de25c6981d004e32b81dc65b4869eda25f7c (diff)
downloadarmnn-87972be8d838f6fde6f6e98dd81c422e85457a5e.tar.gz
IVGCVSW-4119 Fix FP16 to FP32 fallback mechanism in optimizer to work with Dequantize
* Check for output data type as well as input data type when determining whether we should attempt to fall back to FP32 if FP16 is not supported * Override output type for Dequantize in IsLayerSupported() instead of input type * Updated original input type from FP16 to FP32 in InsertConvertFp32ToFp16LayersAfter() Signed-off-by: Aron Virginas-Tar <Aron.Virginas-Tar@arm.com> Change-Id: Ic6477fd17cea5a91bd8bf9ae0cf836520897d5b7
-rw-r--r--src/armnn/Network.cpp32
-rw-r--r--src/armnn/NetworkUtils.cpp114
-rw-r--r--src/armnn/NetworkUtils.hpp4
-rw-r--r--src/armnn/optimizations/ConvertFp32NetworkToFp16.hpp5
-rw-r--r--src/backends/backendsCommon/WorkloadFactory.cpp4
5 files changed, 102 insertions, 57 deletions
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 573f6a19e8..1797baf78e 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -71,8 +71,6 @@ Status OptimizedNetwork::SerializeToDot(std::ostream& stream) const
return m_Graph->SerializeToDot(stream);
}
-
-
void ReportError(const std::string& errorMessage,
Optional<std::vector<std::string>&> errorMessages)
{
@@ -166,7 +164,12 @@ OptimizationResult AssignBackends(OptimizedNetwork* optNetObjPtr,
for (auto it = firstLayer; it != lastLayer; ++it)
{
auto layer = *it;
- DataType dataType = layer->GetDataType();
+
+ DataType dataTypeIn = layer->GetNumInputSlots() == 0 ? DataType::Float32 :
+ layer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType();
+ DataType dataTypeOut = layer->GetNumOutputSlots() == 0 ? DataType::Float32 :
+ layer->GetOutputSlot(0).GetTensorInfo().GetDataType();
+
std::string reasonIfUnsupported;
bool found = false;
if (!CheckScaleSetOnQuantizedType(layer, errMessages))
@@ -181,21 +184,29 @@ OptimizationResult AssignBackends(OptimizedNetwork* optNetObjPtr,
// need to set the compute device on the layer
// before we can check if it is supported
layer->SetBackendId(backend);
- if (!IWorkloadFactory::IsLayerSupported(*layer, dataType, reasonIfUnsupported))
+ if (!IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported))
{
- if (dataType == DataType::Float16)
+ if (dataTypeIn == DataType::Float16 || dataTypeOut == DataType::Float16)
{
if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
&& layer->GetType() != LayerType::ConvertFp32ToFp16
&& layer->GetType() != LayerType::ConvertFp16ToFp32)
{
// Insert FP16 -> FP32 conversion layer before current layer
- std::vector<ConvertFp16ToFp32Layer*> convertFp16ToFp32Layers =
- InsertConvertFp16ToFp32LayersBefore(optNetObjPtr->GetGraph(), *layer);
+ std::vector<ConvertFp16ToFp32Layer*> convertFp16ToFp32Layers;
+ if (dataTypeIn == DataType::Float16)
+ {
+ convertFp16ToFp32Layers =
+ InsertConvertFp16ToFp32LayersBefore(optNetObjPtr->GetGraph(), *layer);
+ }
// Insert FP32 -> FP16 conversion layer after current layer
- std::vector<ConvertFp32ToFp16Layer*> convertFp32ToFp16Layers =
- InsertConvertFp32ToFp16LayersAfter(optNetObjPtr->GetGraph(), *layer);
+ std::vector<ConvertFp32ToFp16Layer*> convertFp32ToFp16Layers;
+ if (dataTypeOut == DataType::Float16)
+ {
+ convertFp32ToFp16Layers =
+ InsertConvertFp32ToFp16LayersAfter(optNetObjPtr->GetGraph(), *layer);
+ }
// Assign a supported backend to the newly introduced conversion layers
auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
@@ -258,7 +269,8 @@ OptimizationResult AssignBackends(OptimizedNetwork* optNetObjPtr,
std::stringstream warningMsg;
warningMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
<< " is not supported on requested backend " << layer->GetBackendId().Get()
- << " for data type " << GetDataTypeName(dataType)
+ << " for input data type " << GetDataTypeName(dataTypeIn)
+ << " and output data type " << GetDataTypeName(dataTypeOut)
<< " (reason: " << reasonIfUnsupported
<< "), falling back to the next backend.";
ReportWarning(warningMsg.str(), errMessages);
diff --git a/src/armnn/NetworkUtils.cpp b/src/armnn/NetworkUtils.cpp
index cfed6680ca..1bbeaac005 100644
--- a/src/armnn/NetworkUtils.cpp
+++ b/src/armnn/NetworkUtils.cpp
@@ -13,67 +13,97 @@
namespace armnn
{
-std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& graph, Layer& layer)
+namespace
{
- std::vector<ConvertFp16ToFp32Layer*> convertLayers;
- convertLayers.reserve(layer.GetNumInputSlots());
-
- for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
- {
- // Insert FP16 to FP32 converter layer before the layer
- const std::string name =
- std::string("convert_fp16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") + layer.GetName();
- ConvertFp16ToFp32Layer* convertLayer =
- graph.InsertNewLayer<ConvertFp16ToFp32Layer>(*inputSlot, name.c_str());
-
- // Sets output tensor info for the convert layer
- TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
- convertInfo.SetDataType(DataType::Float32);
- convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
+void UpdateOutputSlotFp16ToFp32(OutputSlot& outputSlot)
+{
+ const TensorInfo& origTensorInfo = outputSlot.GetTensorInfo();
+ TensorInfo newTensorInfo(origTensorInfo);
+ newTensorInfo.SetDataType(DataType::Float32);
+ outputSlot.SetTensorInfo(newTensorInfo);
+}
- convertLayers.emplace_back(convertLayer);
+void ChangeOutputFp16ToFp32(Layer& layer)
+{
+ for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
+ {
+ if (outputSlot->GetTensorInfo().GetDataType() == DataType::Float16)
+ {
+ UpdateOutputSlotFp16ToFp32(*outputSlot);
+ }
}
+}
- // Sets the output tensor info for the unsupported layer
- auto UpdateTensorInfo = [](auto& outputSlot)
- {
- // Copy original tensor info and change data type to FP32
- TensorInfo newTensorInfo = outputSlot.GetTensorInfo();
- newTensorInfo.SetDataType(DataType::Float32);
+} // anonymous namespace
- outputSlot.SetTensorInfo(newTensorInfo);
- };
+std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& graph,
+ Layer& layer,
+ bool expectCorrectInputType)
+{
+ std::vector<ConvertFp16ToFp32Layer*> convertLayers;
+ convertLayers.reserve(layer.GetNumInputSlots());
- std::for_each(layer.BeginOutputSlots(), layer.EndOutputSlots(), UpdateTensorInfo);
+ // Insert a ConvertFp16ToFp32Layer before each input slot
+ for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
+ {
+ bool allowInsert = true;
+ if (expectCorrectInputType)
+ {
+ // Only insert ConvertFp16ToFp32Layer before FP16 input slots
+ OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
+ allowInsert =
+ connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float16;
+ }
+
+ if (allowInsert)
+ {
+ const std::string name =
+ std::string("convert_fp16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
+ layer.GetName();
+ ConvertFp16ToFp32Layer* convertLayer =
+ graph.InsertNewLayer<ConvertFp16ToFp32Layer>(*inputSlot, name.c_str());
+
+ TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
+ convertInfo.SetDataType(DataType::Float32);
+
+ convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
+
+ convertLayers.emplace_back(convertLayer);
+ }
+ }
return convertLayers;
}
std::vector<ConvertFp32ToFp16Layer*> InsertConvertFp32ToFp16LayersAfter(Graph& graph, Layer& layer)
{
+ const unsigned int numOutputSlots = layer.GetNumOutputSlots();
+
std::vector<ConvertFp32ToFp16Layer*> convertLayers;
- convertLayers.reserve(layer.GetNumOutputSlots());
+ convertLayers.reserve(numOutputSlots);
- int index = 0;
- // Change outputs to DataType::Float16
- for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
- {
- BOOST_ASSERT(outputSlot->GetTensorInfo().GetDataType() == DataType::Float32);
+ // Update FP16 output slots to FP32 on current layer
+ ChangeOutputFp16ToFp32(layer);
- // Insert FP32 to FP16 converter layer after the layer
- const std::string name =
- std::string("convert_fp32_to_fp16-" + std::to_string(index++) + "-") + layer.GetName();
- ConvertFp32ToFp16Layer* convertLayer =
- graph.InsertNewLayer<ConvertFp32ToFp16Layer>(*outputSlot, name.c_str());
+ // Insert a ConvertFp32ToFp16Layer after each FP32 output slot
+ for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex)
+ {
+ OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex);
+ if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32)
+ {
+ const std::string name =
+ std::string("convert_fp32_to_fp16-" + std::to_string(slotIndex) + "-") + layer.GetName();
+ ConvertFp32ToFp16Layer* convertLayer =
+ graph.InsertNewLayer<ConvertFp32ToFp16Layer>(outputSlot, name.c_str());
- // Sets output tensor info for the convert layer.
- TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
- convertInfo.SetDataType(DataType::Float16);
+ TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
+ convertInfo.SetDataType(DataType::Float16);
- convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
+ convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
- convertLayers.emplace_back(convertLayer);
+ convertLayers.emplace_back(convertLayer);
+ }
}
return convertLayers;
diff --git a/src/armnn/NetworkUtils.hpp b/src/armnn/NetworkUtils.hpp
index 421c52a6a7..38fb22350d 100644
--- a/src/armnn/NetworkUtils.hpp
+++ b/src/armnn/NetworkUtils.hpp
@@ -11,7 +11,9 @@
namespace armnn
{
-std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& graph, Layer& layer);
+std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& graph,
+ Layer& layer,
+ bool expectCorrectInputType = true);
std::vector<ConvertFp32ToFp16Layer*> InsertConvertFp32ToFp16LayersAfter(Graph& graph, Layer& layer);
diff --git a/src/armnn/optimizations/ConvertFp32NetworkToFp16.hpp b/src/armnn/optimizations/ConvertFp32NetworkToFp16.hpp
index 729b76ad6b..9658a35560 100644
--- a/src/armnn/optimizations/ConvertFp32NetworkToFp16.hpp
+++ b/src/armnn/optimizations/ConvertFp32NetworkToFp16.hpp
@@ -15,7 +15,6 @@ namespace optimizations
class ConvertFp32NetworkToFp16Impl
{
public:
-
void Run(Graph& graph, Layer& layer) const
{
if(layer.GetType() == LayerType::Input)
@@ -33,7 +32,9 @@ public:
// add a ConvertFloat16ToFloat32 layer before each of the inputs
if (layer.GetDataType() == DataType::Float32)
{
- InsertConvertFp16ToFp32LayersBefore(graph, layer);
+ // NOTE: We need to call InsertConvertFp16ToFp32LayersBefore with expectCorrectInputType = false
+ // here, otherwise it will expect the inputs to be DataType::Float16
+ InsertConvertFp16ToFp32LayersBefore(graph, layer, false);
}
}
else if (layer.GetType() != LayerType::ConvertFp32ToFp16 && layer.GetType() != LayerType::ConvertFp16ToFp32)
diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp
index 4a7f007c2e..9901dcb7c1 100644
--- a/src/backends/backendsCommon/WorkloadFactory.cpp
+++ b/src/backends/backendsCommon/WorkloadFactory.cpp
@@ -265,8 +265,8 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId,
const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
- result = layerSupportObject->IsDequantizeSupported(OverrideDataType(input, dataType),
- output,
+ result = layerSupportObject->IsDequantizeSupported(input,
+ OverrideDataType(output, dataType),
reason);
break;
}