diff options
Diffstat (limited to 'src/armnn')
-rw-r--r-- | src/armnn/layers/DepthwiseConvolution2dLayer.cpp | 13 | ||||
-rw-r--r-- | src/armnn/optimizations/FuseBatchNorm.hpp | 25 | ||||
-rw-r--r-- | src/armnn/test/CreateWorkload.hpp | 4 | ||||
-rw-r--r-- | src/armnn/test/InferOutputTests.hpp | 2 | ||||
-rw-r--r-- | src/armnn/test/OptimizerTests.cpp | 4 | ||||
-rw-r--r-- | src/armnn/test/optimizations/FoldPadTests.cpp | 2 | ||||
-rw-r--r-- | src/armnn/test/optimizations/FuseActivationTests.cpp | 6 | ||||
-rw-r--r-- | src/armnn/test/optimizations/FuseBatchNormTests.cpp | 12 |
8 files changed, 28 insertions, 40 deletions
diff --git a/src/armnn/layers/DepthwiseConvolution2dLayer.cpp b/src/armnn/layers/DepthwiseConvolution2dLayer.cpp index b96c567504..ed52b39050 100644 --- a/src/armnn/layers/DepthwiseConvolution2dLayer.cpp +++ b/src/armnn/layers/DepthwiseConvolution2dLayer.cpp @@ -98,24 +98,21 @@ DepthwiseConvolution2dLayer::InferOutputShapes(const std::vector<TensorShape>& i unsigned int inputBatchSize = inputShape[0]; unsigned int inputHeight = inputShape[dataLayoutIndex.GetHeightIndex()]; unsigned int inputWidth = inputShape[dataLayoutIndex.GetWidthIndex()]; - unsigned int inputChannels = inputShape[dataLayoutIndex.GetChannelsIndex()]; - // Expected filter shape: [ M, I, H, W ] - This shape does NOT depend on the data layout - // Namely: [ depth multiplier, input channels, filter height, filter width ] - // Output channels = input channels * depthMultiplier - unsigned int depthMultiplier = filterShape[0]; + // Expected filter shape: [ 1, H, W, O ] - This shape does NOT depend on the data layout + // Namely: [ 1, filter height, filter width, output channels ] - unsigned int filterHeight = filterShape[2]; + unsigned int filterHeight = filterShape[1]; unsigned int dilatedFilterHeight = filterHeight + (m_Param.m_DilationY - 1) * (filterHeight - 1); unsigned int readHeight = (inputHeight + m_Param.m_PadTop + m_Param.m_PadBottom) - dilatedFilterHeight; unsigned int outputHeight = 1 + (readHeight / m_Param.m_StrideY); - unsigned int filterWidth = filterShape[3]; + unsigned int filterWidth = filterShape[2]; unsigned int dilatedFilterWidth = filterWidth + (m_Param.m_DilationX - 1) * (filterWidth - 1); unsigned int readWidth = (inputWidth + m_Param.m_PadLeft + m_Param.m_PadRight) - dilatedFilterWidth; unsigned int outputWidth = 1 + (readWidth / m_Param.m_StrideX); - unsigned int outputChannels = inputChannels * depthMultiplier; + unsigned int outputChannels = filterShape[3]; unsigned int outputBatchSize = inputBatchSize; TensorShape tensorShape = m_Param.m_DataLayout == armnn::DataLayout::NHWC ? diff --git a/src/armnn/optimizations/FuseBatchNorm.hpp b/src/armnn/optimizations/FuseBatchNorm.hpp index 3fb4b34d28..fe8238bf14 100644 --- a/src/armnn/optimizations/FuseBatchNorm.hpp +++ b/src/armnn/optimizations/FuseBatchNorm.hpp @@ -56,13 +56,12 @@ public: armnnUtils::DataLayoutIndexed dataLayout(convDescriptor.m_DataLayout); auto weightsShape = weightsInfo.GetShape(); - const unsigned int depthMultiplier = depthwise ? weightsShape[0] : 1; - const unsigned int inputChannels = depthwise ? weightsShape[1] : - weightsShape[dataLayout.GetChannelsIndex()]; - const unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : weightsShape[0]; - const unsigned int weightsHeight = depthwise ? weightsShape[2] : + const unsigned int inputChannels = parentOut->GetTensorInfo().GetShape()[dataLayout.GetChannelsIndex()]; + const unsigned int depthMultiplier = depthwise ? weightsShape[3] / inputChannels : 1; + const unsigned int outputChannels = depthwise ? weightsShape[3] : weightsShape[0]; + const unsigned int weightsHeight = depthwise ? weightsShape[1] : weightsShape[dataLayout.GetHeightIndex()]; - const unsigned int weightsWidth = depthwise ? weightsShape[3] : + const unsigned int weightsWidth = depthwise ? weightsShape[2] : weightsShape[dataLayout.GetWidthIndex()]; const auto* weightsBuffer = static_cast<const T*>(weightsTensor.GetMemoryArea()); @@ -79,7 +78,6 @@ public: // fusedWeights = ( gamma * weights ) / ( std - epsilon); std::vector<T> fusedWeightsVector(weightsVector.size()); - unsigned int depthwiseMultiplierIdx = 0; for (unsigned int cInput = 0; cInput < inputChannels; ++cInput) { @@ -87,12 +85,6 @@ public: { T mult = gammaVector[cOut] / static_cast<T>(sqrtf (varianceVector[cOut] + epsilon)); - if (depthwise) - { - cInput = cOut / depthMultiplier; - depthwiseMultiplierIdx = cOut % depthMultiplier; - } - for (unsigned int h = 0; h < weightsHeight; ++h) { for (unsigned int w = 0; w < weightsWidth; ++w) @@ -101,10 +93,9 @@ public: if (depthwise) { - weightsIdx = depthwiseMultiplierIdx * weightsWidth * weightsHeight * inputChannels + - cInput * weightsWidth * weightsHeight + - h * weightsWidth + - w; + cInput = cOut / depthMultiplier; + weightsIdx = w * outputChannels + cOut + + h * weightsWidth * outputChannels; } else if (convDescriptor.m_DataLayout == DataLayout::NHWC) { diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp index 581c621a16..b07e3b80a5 100644 --- a/src/armnn/test/CreateWorkload.hpp +++ b/src/armnn/test/CreateWorkload.hpp @@ -1149,7 +1149,7 @@ std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolutio DepthwiseConvolution2dLayer* const layer = graph.AddLayer<DepthwiseConvolution2dLayer>(layerDesc, "layer"); - layer->m_Weight = std::make_unique<ScopedTensorHandle>(TensorInfo({1, 2, 4, 4}, DataType)); // [ M, I, H, W ] + layer->m_Weight = std::make_unique<ScopedTensorHandle>(TensorInfo({1, 4, 4, 2}, DataType)); // [ 1, H, W, I*M ] layer->m_Weight->Allocate(); // Creates extra layers. @@ -1181,7 +1181,7 @@ std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolutio CHECK(queueDescriptor.m_Inputs.size() == 1); CHECK(queueDescriptor.m_Outputs.size() == 1); - CHECK((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({1, 2, 4, 4}, DataType))); + CHECK((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({1, 4, 4, 2}, DataType))); // Returns so we can do extra, backend-specific tests. return workload; diff --git a/src/armnn/test/InferOutputTests.hpp b/src/armnn/test/InferOutputTests.hpp index b8276de80c..6e2676ec8e 100644 --- a/src/armnn/test/InferOutputTests.hpp +++ b/src/armnn/test/InferOutputTests.hpp @@ -518,7 +518,7 @@ void DepthwiseConvolution2dInferOutputShapeTest() armnn::TensorShape inputShape(4, inputSize.data()); shapes.push_back(inputShape); - const std::vector<unsigned int> filterSize = { 1, 2, 3, 3}; + const std::vector<unsigned int> filterSize = { 1, 3, 3, 2 }; armnn::TensorShape filterShape(4, filterSize.data()); shapes.push_back(filterShape); diff --git a/src/armnn/test/OptimizerTests.cpp b/src/armnn/test/OptimizerTests.cpp index e68546c9dd..d4e2d499d5 100644 --- a/src/armnn/test/OptimizerTests.cpp +++ b/src/armnn/test/OptimizerTests.cpp @@ -340,7 +340,7 @@ TEST_CASE("DepthwiseConv2dValidateTensorShapesFromInputs") { Graph graph; const unsigned int inputShape[] = { 1, 2, 3, 3 }; - const unsigned int weightsShape[] = { 1, 2, 3, 3 }; + const unsigned int weightsShape[] = { 1, 3, 3, 2 }; const unsigned int outputShape[] = { 1, 2, 1, 1 }; CreateDepthwiseConvolution2dGraph(graph, inputShape, weightsShape, outputShape); @@ -351,7 +351,7 @@ TEST_CASE("DepthwiseConv2dValidateTensorShapesFromInputsNhwc") { Graph graph; const unsigned int inputShape[] = { 1, 3, 3, 2 }; - const unsigned int weightsShape[] = { 1, 2, 3, 3 }; + const unsigned int weightsShape[] = { 1, 3, 3, 2 }; const unsigned int outputShape[] = { 1, 1, 1, 2 }; CreateDepthwiseConvolution2dGraph(graph, inputShape, weightsShape, outputShape, DataLayout::NHWC); diff --git a/src/armnn/test/optimizations/FoldPadTests.cpp b/src/armnn/test/optimizations/FoldPadTests.cpp index 7b4ac4170f..11f09e80e0 100644 --- a/src/armnn/test/optimizations/FoldPadTests.cpp +++ b/src/armnn/test/optimizations/FoldPadTests.cpp @@ -687,7 +687,7 @@ TEST_CASE("FoldPadLayerIntoDepthwiseConv2dLayer_ExecuteInferenceWithAndWithoutOp // avoided. The output tensors of each should match. const unsigned int inputShape[] = {1, 4, 4, 3}; // NHWCin const unsigned int paddedShape[] = {1, 6, 6, 3}; - const unsigned int weightsShape[] = {4, 3, 2, 2}; // MCinHW + const unsigned int weightsShape[] = {1, 2, 2, 12}; // 1HWCout const unsigned int outputShape[] = {1, 5, 5, 12}; // NHWCout std::vector<float> inputData({2.0f, 2.0f, 6.0f, 6.0f, diff --git a/src/armnn/test/optimizations/FuseActivationTests.cpp b/src/armnn/test/optimizations/FuseActivationTests.cpp index 9e332136f6..35b5bbc2da 100644 --- a/src/armnn/test/optimizations/FuseActivationTests.cpp +++ b/src/armnn/test/optimizations/FuseActivationTests.cpp @@ -81,9 +81,9 @@ public: using LayerType = DepthwiseConvolution2dLayer; static const bool isElementWise = false; - static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin - static TensorShape GetOutputShape() { return TensorShape( {1, 3, 3, 12}); } // NHWCout - static TensorShape GetWeightsShape() { return TensorShape( {4, 3, 2, 2}); } // MCinHW + static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // [N,H,W,Cin] + static TensorShape GetOutputShape() { return TensorShape( {1, 3, 3, 12}); } // [N,H,W,Cout] + static TensorShape GetWeightsShape() { return TensorShape( {1, 2, 2, 12}); } // [1,H,W,Cout] constexpr static const unsigned int inputSize = 48; //batchIn * heightIn * widthIn * channelIn; constexpr static const unsigned int outputSize = 108; //batchOut * heightOut * widthOut * channelOut; diff --git a/src/armnn/test/optimizations/FuseBatchNormTests.cpp b/src/armnn/test/optimizations/FuseBatchNormTests.cpp index 671f565054..20d2940b81 100644 --- a/src/armnn/test/optimizations/FuseBatchNormTests.cpp +++ b/src/armnn/test/optimizations/FuseBatchNormTests.cpp @@ -90,12 +90,12 @@ INetworkPtr CreatNetwork(bool depthwise, bool preventFusing) if (depthwise) { - //M Cin H W - weightsDimensionSizes[0] = 4; - weightsDimensionSizes[1] = 3; + // [1, H, W, Cout] + weightsDimensionSizes[0] = 1; + weightsDimensionSizes[1] = 2; weightsDimensionSizes[2] = 2; - weightsDimensionSizes[3] = 2; - outputDimensionSizes[3] = weightsDimensionSizes[0] * weightsDimensionSizes[1]; + weightsDimensionSizes[3] = 12; + outputDimensionSizes[3] = weightsDimensionSizes[3]; } const unsigned int outputChannelSize[] = {outputDimensionSizes[3]}; // Cout @@ -295,7 +295,7 @@ TEST_CASE("FuseBatchNormIntoDepthwiseConv2DFloat32Test") TEST_CASE("FuseBatchNormIntoDepthwiseConv2DFloat16Test") { - FuseBatchNormIntoConvTest<DepthwiseConv2dTest, DataType::Float16>(true, 0.1f,armnn::Compute::CpuRef); + FuseBatchNormIntoConvTest<DepthwiseConv2dTest, DataType::Float16>(true, 0.2f,armnn::Compute::CpuRef); } #endif |