aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/optimizations
diff options
context:
space:
mode:
authorMike Kelly <mike.kelly@arm.com>2020-11-05 15:44:56 +0000
committerJim Flynn <jim.flynn@arm.com>2020-11-08 22:50:50 +0000
commit90231b8c9f680d323e4b93dcd0820a47925e6d24 (patch)
treec34ace59ad3dd4757650a7b5d8f21a996c31cc88 /src/armnn/optimizations
parent82490eee66b733ef29b3697b21e10a4c8be95233 (diff)
downloadarmnn-90231b8c9f680d323e4b93dcd0820a47925e6d24.tar.gz
IVGCVSW-5315 Create FuseBatchNorm class
Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com> Signed-off-by: Mike Kelly <mike.kelly@arm.com> Change-Id: Id0625c58dbeea79874bf986b70d136ed9390bf83
Diffstat (limited to 'src/armnn/optimizations')
-rw-r--r--src/armnn/optimizations/FuseBatchNorm.hpp125
1 files changed, 89 insertions, 36 deletions
diff --git a/src/armnn/optimizations/FuseBatchNorm.hpp b/src/armnn/optimizations/FuseBatchNorm.hpp
index e8e8c5d77f..9d25379930 100644
--- a/src/armnn/optimizations/FuseBatchNorm.hpp
+++ b/src/armnn/optimizations/FuseBatchNorm.hpp
@@ -7,13 +7,15 @@
#include "Optimization.hpp"
#include <armnnUtils/DataLayoutIndexed.hpp>
+#include <ResolveType.hpp>
namespace armnn
{
namespace optimizations
{
-template <typename ConvLayer>
+template <typename ConvLayer, armnn::DataType ArmnnType,
+ typename T = armnn::ResolveType<ArmnnType>>
class FuseBatchNorm
{
public:
@@ -27,10 +29,12 @@ public:
Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
Layer& child = connection.GetOwningLayer();
- ARMNN_ASSERT(base.GetType() == LayerType::Convolution2d);
+ bool depthwise = (base.GetType() == LayerType::DepthwiseConvolution2d);
+
+ ARMNN_ASSERT(base.GetType() == LayerType::Convolution2d || depthwise);
ARMNN_ASSERT(child.GetType() == LayerType::BatchNormalization);
- if (base.GetDataType() == DataType::Float32 && child.GetDataType() == DataType::Float32)
+ if (base.GetDataType() == ArmnnType && child.GetDataType() == ArmnnType)
{
OutputSlot* parentOut = base.GetInputSlot(0).GetConnectedOutputSlot();
auto convLayer = PolymorphicDowncast<ConvLayer*>(&base);
@@ -47,58 +51,92 @@ public:
ConstTensor varTensor(batchNormLayer->m_Variance->GetTensorInfo(), batchNormLayer->m_Variance->Map(true));
auto convDescriptor = convLayer->GetParameters();
- ConstTensor weightsTensor(convLayer->m_Weight->GetTensorInfo(), convLayer->m_Weight->Map(true));
+ auto weightsInfo(convLayer->m_Weight->GetTensorInfo());
+ ConstTensor weightsTensor(weightsInfo, convLayer->m_Weight->Map(true));
armnnUtils::DataLayoutIndexed dataLayout(convDescriptor.m_DataLayout);
- auto weightsShape = convLayer->m_Weight->GetTensorInfo().GetShape();
- const unsigned int outputChannels = weightsShape[0];
- const unsigned int inputChannels = weightsShape[dataLayout.GetChannelsIndex()];
- const unsigned int weightsHeight = weightsShape[dataLayout.GetHeightIndex()];
- const unsigned int weightsWidth = weightsShape[dataLayout.GetWidthIndex()];
-
- const auto* weightsBuffer = static_cast<const float*>(weightsTensor.GetMemoryArea());
- const auto* betaBuffer = static_cast<const float*>(betaTensor.GetMemoryArea());
- const auto* gammaBuffer = static_cast<const float*>(gammaTensor.GetMemoryArea());
- const auto* meanBuffer = static_cast<const float*>(meanTensor.GetMemoryArea());
- const auto* varBuffer = static_cast<const float*>(varTensor.GetMemoryArea());
-
- std::vector<float> weightsVector (weightsBuffer, weightsBuffer + weightsTensor.GetNumElements());
- std::vector<float> betaVector (betaBuffer, betaBuffer + betaTensor.GetNumElements());
- std::vector<float> gammaVector (gammaBuffer, gammaBuffer + gammaTensor.GetNumElements());
- std::vector<float> meanVector (meanBuffer, meanBuffer + meanTensor.GetNumElements());
- std::vector<float> varianceVector(varBuffer, varBuffer + varTensor.GetNumElements());
+ auto weightsShape = weightsInfo.GetShape();
+ const unsigned int depthMultiplier = depthwise ? weightsShape[0] : 1;
+ const unsigned int inputChannels = depthwise ? weightsShape[1] :
+ weightsShape[dataLayout.GetChannelsIndex()];
+ const unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : weightsShape[0];
+ const unsigned int weightsHeight = depthwise ? weightsShape[2] :
+ weightsShape[dataLayout.GetHeightIndex()];
+ const unsigned int weightsWidth = depthwise ? weightsShape[3] :
+ weightsShape[dataLayout.GetWidthIndex()];
+
+ const auto* weightsBuffer = static_cast<const T*>(weightsTensor.GetMemoryArea());
+ const auto* betaBuffer = static_cast<const T*>(betaTensor.GetMemoryArea());
+ const auto* gammaBuffer = static_cast<const T*>(gammaTensor.GetMemoryArea());
+ const auto* meanBuffer = static_cast<const T*>(meanTensor.GetMemoryArea());
+ const auto* varBuffer = static_cast<const T*>(varTensor.GetMemoryArea());
+
+ std::vector<T> weightsVector (weightsBuffer, weightsBuffer + weightsTensor.GetNumElements());
+ std::vector<T> betaVector (betaBuffer, betaBuffer + betaTensor.GetNumElements());
+ std::vector<T> gammaVector (gammaBuffer, gammaBuffer + gammaTensor.GetNumElements());
+ std::vector<T> meanVector (meanBuffer, meanBuffer + meanTensor.GetNumElements());
+ std::vector<T> varianceVector(varBuffer, varBuffer + varTensor.GetNumElements());
// fusedWeights = ( gamma * weights ) / ( std - epsilon);
- std::vector<float> fusedWeightsVector(weightsVector.size());
+ std::vector<T> fusedWeightsVector(weightsVector.size());
+ unsigned int depthwiseMultiplierIdx = 0;
- unsigned int i = 0;
- for (unsigned int cOut = 0; cOut < outputChannels; ++cOut)
+ for (unsigned int cInput = 0; cInput < inputChannels; ++cInput)
{
- auto mult = gammaVector[cOut] / sqrtf (varianceVector[cOut] + epsilon);
- for (unsigned int cInput = 0; cInput < inputChannels; ++cInput)
+ for (unsigned int cOut = 0; cOut < outputChannels; ++cOut)
{
+ T mult = gammaVector[cOut] / static_cast<T>(sqrtf (varianceVector[cOut] + epsilon));
+
+ if (depthwise)
+ {
+ cInput = cOut / depthMultiplier;
+ depthwiseMultiplierIdx = cOut % depthMultiplier;
+ }
+
for (unsigned int h = 0; h < weightsHeight; ++h)
{
for (unsigned int w = 0; w < weightsWidth; ++w)
{
- fusedWeightsVector[i] = mult * weightsVector[i];
- i++;
+ unsigned int weightsIdx = 0;
+
+ if (depthwise)
+ {
+ weightsIdx = depthwiseMultiplierIdx * weightsWidth * weightsHeight * inputChannels +
+ cInput * weightsWidth * weightsHeight +
+ h * weightsWidth +
+ w;
+ }
+ else if (convDescriptor.m_DataLayout == DataLayout::NHWC)
+ {
+ weightsIdx = cOut * weightsHeight * weightsWidth * inputChannels +
+ h * weightsWidth * inputChannels +
+ w * inputChannels +
+ cInput;
+ }
+ else
+ {
+ weightsIdx = cOut * weightsWidth * weightsHeight * inputChannels +
+ cInput * weightsWidth * weightsHeight +
+ h * weightsWidth +
+ w;
+ }
+ fusedWeightsVector[weightsIdx] = mult * weightsVector[weightsIdx];
}
}
}
}
- ConstTensor fusedWeightsTensor(convLayer->m_Weight->GetTensorInfo(), fusedWeightsVector);
+ ConstTensor fusedWeightsTensor(weightsInfo, fusedWeightsVector);
// fusedBias = (gamma * (bias - mean)) / (variance - epsilon) + beta;
- std::vector<float> fusedBiasVector(outputChannels);
+ std::vector<T> fusedBiasVector(outputChannels);
if (convDescriptor.m_BiasEnabled)
{
ARMNN_ASSERT_MSG(convLayer->m_Bias != nullptr,
"FuseBatchNorm: Bias data should not be null if bias is enabled.");
ConstTensor biasTensor(convLayer->m_Bias->GetTensorInfo(), convLayer->m_Bias->Map(true));
- const auto* biasBuffer = static_cast<const float*>(biasTensor.GetMemoryArea());
- std::vector<float> biasVector(biasBuffer, biasBuffer + biasTensor.GetNumElements());
+ const auto* biasBuffer = static_cast<const T*>(biasTensor.GetMemoryArea());
+ std::vector<T> biasVector(biasBuffer, biasBuffer + biasTensor.GetNumElements());
for (unsigned int cOut = 0; cOut < outputChannels; ++cOut)
{
@@ -109,7 +147,7 @@ public:
else
{
convDescriptor.m_BiasEnabled = true;
- std::vector<float> biasVector(outputChannels, 0);
+ std::vector<T> biasVector(outputChannels, T(0));
for (unsigned int cOut = 0; cOut < outputChannels; ++cOut)
{
@@ -117,7 +155,7 @@ public:
sqrtf(varianceVector[cOut] + epsilon)) + betaVector[cOut];
}
}
- ConstTensor fusedBiasTensor(TensorInfo({outputChannels}, DataType::Float32), fusedBiasVector);
+ ConstTensor fusedBiasTensor(TensorInfo({outputChannels}, ArmnnType), fusedBiasVector);
// Insert the new convolution layer that has batch norm parameters fused into
const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") + base.GetName();
@@ -143,10 +181,25 @@ protected:
~FuseBatchNorm() = default;
};
-using FuseBatchNormIntoConvolution2D =
+using FuseBatchNormIntoConvolution2DFloat32 =
OptimizeForExclusiveConnection<Convolution2dLayer,
BatchNormalizationLayer,
- FuseBatchNorm<Convolution2dLayer>>;
+ FuseBatchNorm<Convolution2dLayer, armnn::DataType::Float32>>;
+
+using FuseBatchNormIntoConvolution2DFloat16 =
+ OptimizeForExclusiveConnection<Convolution2dLayer,
+ BatchNormalizationLayer,
+ FuseBatchNorm<Convolution2dLayer, armnn::DataType::Float16>>;
+
+using FuseBatchNormIntoDepthwiseConvolution2DFloat32 =
+ OptimizeForExclusiveConnection<DepthwiseConvolution2dLayer,
+ BatchNormalizationLayer,
+ FuseBatchNorm<DepthwiseConvolution2dLayer, armnn::DataType::Float32>>;
+
+using FuseBatchNormIntoDepthwiseConvolution2DFloat16 =
+ OptimizeForExclusiveConnection<DepthwiseConvolution2dLayer,
+ BatchNormalizationLayer,
+ FuseBatchNorm<DepthwiseConvolution2dLayer, armnn::DataType::Float16>>;
} // namespace optimizations
} // namespace armnn \ No newline at end of file