// // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // #include "CaffeParser.hpp" #include "RecordByRecordCaffeParser.hpp" #include "armnn/Descriptors.hpp" #include "armnn/INetwork.hpp" #include "armnn/Utils.hpp" #include "armnn/Exceptions.hpp" #include "GraphTopologicalSort.hpp" #include "VerificationHelpers.hpp" #include #include #include // Caffe #include "caffe/proto/caffe.pb.h" // ProtoBuf #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /// Caffe networks are loaded from protobuf files (binary or text) using the protobuf library and the generated /// code from caffe.pb.h. This gives us a caffe::NetParameter which is an in-memory version of the file. /// This contains a flat list of Caffe 'layers' (e.g. convolution, pooling etc.). /// Each layer has inputs (called "bottoms") and outputs (called "tops"). Data flows from bottom to top. /// The bottoms of a layer refer to the tops of other layers, not their names. /// The names of layers seem to be arbitrary (you could rename a layer and the network wouldn't /// need any other changes). /// /// Some layers (e.g. Relu) can be configured so that their top and bottom are both the same. This is called an /// "in-place" layer and is a Caffe runtime feature used to reduce memory usage by modifying tensors in-place. /// This isn't relevant to the parser and so we preprocess these layers to convert them to regular layers, to result /// in a consistent graph structure. namespace armnnCaffeParser { using namespace armnn; using namespace caffe; using namespace std; using namespace google::protobuf::io; namespace { const float* GetArrayPtrFromBlob(const LayerParameter& layerParam, unsigned int blobIndex) { auto nBlobs = layerParam.blobs_size(); if (blobIndex >= boost::numeric_cast(nBlobs)) { throw ParseException( boost::str( boost::format( "Expected data blob at index %1% in layer %2% not found. nBlobs=%2%. %4%") % blobIndex % layerParam.name() % nBlobs % CHECK_LOCATION().AsString())); } const BlobProto& blob = layerParam.blobs(boost::numeric_cast(blobIndex)); const float* arrayPtr = blob.data().data(); return arrayPtr; } void GetDataFromBlob(const LayerParameter& layerParam, vector& outData, unsigned int blobIndex) { auto nBlobs = layerParam.blobs_size(); if (blobIndex >= boost::numeric_cast(nBlobs)) { throw ParseException( boost::str( boost::format( "Expected data blob at index %1% in layer %2% not found. %3%") % blobIndex % layerParam.name() % CHECK_LOCATION().AsString())); } const BlobProto& blob = layerParam.blobs(boost::numeric_cast(blobIndex)); size_t blobSize = boost::numeric_cast(blob.data_size()); if (blobSize != outData.size()) { throw ParseException( boost::str( boost::format( "Data blob at index %1% in layer %2% has an unexpected size. " "Expected %3% elements but got %4% elements. %5%") % blobIndex % layerParam.name() % outData.size() % blobSize % CHECK_LOCATION().AsString())); } int outSizeInt = boost::numeric_cast(outData.size()); for (int i = 0; i < outSizeInt; ++i) { outData[static_cast(i)] = blob.data(i); } } template size_t SizeOfVectorData(const vector& vec) { return vec.size() * sizeof(T); } void ValidateNumInputsOutputs(const caffe::LayerParameter& layerParameter, unsigned int numInputs, unsigned int numOutputs) { int numInputsActual = layerParameter.bottom_size(); if (numInputs != boost::numeric_cast(numInputsActual)) { throw ParseException( boost::str( boost::format("Invalid number of inputs requested %1% for layer %2% " "while only %3% present. %4%") % numInputs % layerParameter.name() % numInputsActual % CHECK_LOCATION().AsString())); } int numOutputsActual = layerParameter.top_size(); if (numOutputs != boost::numeric_cast(numOutputsActual)) { throw ParseException( boost::str( boost::format("Invalid number of outputs requested %1% for layer %2% " "while only %3% present. %4%") % numOutputs % layerParameter.name() % numOutputsActual % CHECK_LOCATION().AsString())); } } template ValueType GetOptionalWithFallback(const ParamType& param, ExtractOptional extractOptional, ExtractFallback extractFallback, ValueType defaultValue) { auto optValue = extractOptional(param, defaultValue); if (optValue.first) { return optValue.second; } auto fallbackValue = extractFallback(param, defaultValue); return fallbackValue.second; } #define GET_OPTIONAL_WITH_VECTOR_FALLBACK(PARAM, \ PARAM_TYPE, \ OPTIONAL_VALUE, \ FALLBACK_VECTOR, \ VALUE_TYPE, \ DEFAULT_VALUE) \ GetOptionalWithFallback( \ PARAM, \ [](const PARAM_TYPE & param, VALUE_TYPE defaultValue) \ { \ if (param.has_##OPTIONAL_VALUE ()) \ { \ return std::make_pair(true, param.OPTIONAL_VALUE ()); \ } \ else \ { \ return std::make_pair(false, defaultValue); \ } \ }, \ [](const PARAM_TYPE & param, VALUE_TYPE defaultValue) \ { \ if (param.FALLBACK_VECTOR##_size() > 0) \ { \ return std::make_pair(true, (param.FALLBACK_VECTOR ()).Get(0)); \ } \ else \ { \ return std::make_pair(false, defaultValue); \ } \ }, \ DEFAULT_VALUE) #define GET_OPTIONAL_WITH_FALLBACK(PARAM, \ PARAM_TYPE, \ OPTIONAL_VALUE, \ FALLBACK_VALUE, \ VALUE_TYPE, \ DEFAULT_VALUE) \ GetOptionalWithFallback( \ PARAM, \ [](const PARAM_TYPE & param, VALUE_TYPE defaultValue) \ { \ if (param.has_##OPTIONAL_VALUE ()) \ { \ return std::make_pair(true, param.OPTIONAL_VALUE ()); \ } \ else \ { \ return std::make_pair(false, defaultValue); \ } \ }, \ [](const PARAM_TYPE & param, VALUE_TYPE defaultValue) \ { \ if (param.has_##FALLBACK_VALUE ()) \ { \ return std::make_pair(true, param.FALLBACK_VALUE ()); \ } \ else \ { \ return std::make_pair(false, defaultValue); \ } \ }, \ DEFAULT_VALUE) } // namespace const std::map CaffeParserBase::ms_CaffeLayerNameToParsingFunctions = { { "Input", &CaffeParserBase::ParseInputLayer }, { "Convolution", &CaffeParserBase::ParseConvLayer }, { "Pooling", &CaffeParserBase::ParsePoolingLayer }, { "ReLU", &CaffeParserBase::ParseReluLayer }, { "LRN", &CaffeParserBase::ParseLRNLayer }, { "InnerProduct", &CaffeParserBase::ParseInnerProductLayer }, { "Softmax", &CaffeParserBase::ParseSoftmaxLayer }, { "Eltwise", &CaffeParserBase::ParseEltwiseLayer }, { "Concat", &CaffeParserBase::ParseConcatLayer }, { "BatchNorm", &CaffeParserBase::ParseBatchNormLayer }, { "Scale", &CaffeParserBase::ParseScaleLayer }, { "Split", &CaffeParserBase::ParseSplitLayer }, { "Dropout", &CaffeParserBase::ParseDropoutLayer}, }; ICaffeParser* ICaffeParser::CreateRaw() { return new RecordByRecordCaffeParser(); } ICaffeParserPtr ICaffeParser::Create() { return ICaffeParserPtr(CreateRaw(), &ICaffeParser::Destroy); } void ICaffeParser::Destroy(ICaffeParser* parser) { delete parser; } CaffeParserBase::CaffeParserBase() : m_Network(nullptr, nullptr) { } CaffeParser::CaffeParser() : CaffeParserBase() { } BindingPointInfo CaffeParserBase::GetNetworkInputBindingInfo(const std::string& name) const { return GetBindingInfo(name, "input", m_NetworkInputsBindingInfo); } BindingPointInfo CaffeParserBase::GetNetworkOutputBindingInfo(const std::string& name) const { return GetBindingInfo(name, "output", m_NetworkOutputsBindingInfo); } std::pair CaffeParserBase::GetBindingInfo(const std::string& layerName, const char* bindingPointDesc, const std::unordered_map& nameToBindingInfo) { auto it = nameToBindingInfo.find(layerName); if (it == nameToBindingInfo.end()) { throw InvalidArgumentException( boost::str( boost::format( "Unknown binding %1% for layer '%2%'. %3%") % bindingPointDesc % layerName % CHECK_LOCATION().AsString())); } return it->second; } TensorInfo CaffeParserBase::BlobShapeToTensorInfo(const caffe::BlobShape& blobShape) const { std::vector shape; for (int j = 0; j < blobShape.dim_size(); ++j) { shape.push_back(static_cast(blobShape.dim(j))); } return TensorInfo(boost::numeric_cast(shape.size()), shape.data(), DataType::Float32); } BlobShape TensorDescToBlobShape(const TensorInfo& desc) { BlobShape ret; for (unsigned int i = 0; i < desc.GetNumDimensions(); ++i) { ret.add_dim(i); ret.set_dim(boost::numeric_cast(i), desc.GetShape()[i]); } return ret; } // Note: can move to CaffeParser when/if we optimise the text/string format // to load on a layer by layer basis vector CaffeParserBase::GetInputs(const LayerParameter& layerParam) { std::vector ret; ret.reserve(boost::numeric_cast(layerParam.bottom_size())); for (int j = 0; j < layerParam.bottom_size(); ++j) { std::string inputName = layerParam.bottom(j); auto inputIt = m_CaffeLayersByTopName.find(inputName); if (inputIt == m_CaffeLayersByTopName.end()) { throw ParseException( boost::str( boost::format( "Can't find Caffe layer with top called '%1%', " "which is listed as an input of '%2%'. %3%") % inputName % layerParam.name() % CHECK_LOCATION().AsString())); } ret.push_back(inputIt->second); } return ret; } void CaffeParserBase::ParseInputLayer(const LayerParameter& layerParam) { ARMNN_ASSERT(layerParam.type() == "Input"); ValidateNumInputsOutputs(layerParam, 0, 1); const InputParameter& param = layerParam.input_param(); const armnn::LayerBindingId inputId = boost::numeric_cast( m_NetworkInputsBindingInfo.size()); armnn::IConnectableLayer* const inputLayer = m_Network->AddInputLayer(inputId, layerParam.name().c_str()); // Decides the tensor info for this input. This can be specified in the Caffe network but can also // be overriden by user input (m_inputShapes). armnn::TensorInfo inputTensorInfo; const BlobShape* originalShape = param.shape_size() > 0 && param.shape(0).dim_size() > 0 ? ¶m.shape(0) : nullptr; if (originalShape) { inputTensorInfo = BlobShapeToTensorInfo(*originalShape); } auto overrideIt = m_InputShapes.find(layerParam.name()); if (overrideIt != m_InputShapes.end()) { const TensorShape& overrideShape = overrideIt->second; if (originalShape && ( originalShape->dim(1) != overrideShape[1] || originalShape->dim(2) != overrideShape[2] || originalShape->dim(3) != overrideShape[3])) { throw ParseException( boost::str( boost::format( "Parsed input shape for '%1%' is incompatible with the override provided. %2%") % layerParam.name() % CHECK_LOCATION().AsString())); } inputTensorInfo.SetShape(overrideShape); } else if (!originalShape) { throw ParseException( boost::str( boost::format( "No input descriptor given for '%1%' and no input shape found in caffe model. %2%") % layerParam.name() % CHECK_LOCATION().AsString())); } TrackInputBinding(inputLayer, inputId, inputTensorInfo); inputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo); SetArmnnOutputSlotForCaffeTop(layerParam.top(0), inputLayer->GetOutputSlot(0)); } void CaffeParserBase::AddConvLayerWithSplits(const caffe::LayerParameter& layerParam, const armnn::Convolution2dDescriptor& desc, unsigned int kernelW, unsigned int kernelH) { ARMNN_ASSERT(layerParam.type() == "Convolution"); ValidateNumInputsOutputs(layerParam, 1, 1); ConvolutionParameter convParam = layerParam.convolution_param(); BlobShape inputShape = TensorDescToBlobShape(GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo()); const unsigned int numGroups = convParam.has_group() ? convParam.group() : 1; // asusme these were already verified by the caller ParseConvLayer() function ARMNN_ASSERT(numGroups < inputShape.dim(1)); ARMNN_ASSERT(numGroups > 1); // Handle grouping armnn::IOutputSlot& inputConnection = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)); vector convLayerNames(numGroups); vector convLayers(numGroups); convLayerNames[0] = layerParam.name(); // This convolution is to be applied to chunks of the input data so add a splitter layer // Redirect the convolution input to the splitter unsigned int splitterDimSizes[4] = {static_cast(inputShape.dim(0)), static_cast(inputShape.dim(1)), static_cast(inputShape.dim(2)), static_cast(inputShape.dim(3))}; // Split dimension 1 of the splitter output shape and conv input shapes // according to the number of groups splitterDimSizes[1] /= numGroups; inputShape.set_dim(1, splitterDimSizes[1]); // This is used to describe how the input is to be split ViewsDescriptor splitterDesc(numGroups); // Create an output node for each group, giving each a unique name for (unsigned int g = 0; g < numGroups; ++g) { // Work out the names of the splitter layers child convolutions stringstream ss; ss << layerParam.name() << "_" << g; convLayerNames[g] = ss.str(); splitterDesc.SetViewOriginCoord(g, 1, splitterDimSizes[1] * g); // Set the size of the views. for (unsigned int dimIdx=0; dimIdx < 4; dimIdx++) { splitterDesc.SetViewSize(g, dimIdx, splitterDimSizes[dimIdx]); } } const std::string splitterLayerName = std::string("splitter_") + layerParam.bottom(0); armnn::IConnectableLayer* splitterLayer = m_Network->AddSplitterLayer(splitterDesc, splitterLayerName.c_str()); inputConnection.Connect(splitterLayer->GetInputSlot(0)); for (unsigned int i = 0; i < splitterLayer->GetNumOutputSlots(); i++) { splitterLayer->GetOutputSlot(i).SetTensorInfo(BlobShapeToTensorInfo(inputShape)); } unsigned int numFilters = convParam.num_output(); // Populates convolution output tensor descriptor dimensions. BlobShape outputShape; outputShape.add_dim(0); outputShape.set_dim(0, inputShape.dim(0)); outputShape.add_dim(1); // Ensures that dimension 1 of the convolution output is split according to the number of groups. outputShape.set_dim(1, numFilters / numGroups); outputShape.add_dim(2); outputShape.set_dim( 2, (static_cast( static_cast(inputShape.dim(2) + 2 * desc.m_PadBottom - kernelH) / static_cast(desc.m_StrideY)) + 1)); outputShape.add_dim(3); outputShape.set_dim( 3, (static_cast( static_cast(inputShape.dim(3) + 2 * desc.m_PadRight - kernelW) / static_cast(desc.m_StrideX)) + 1)); // Load the weight data for ALL groups vector weightData(boost::numeric_cast(numGroups * inputShape.dim(1) * // number of input channels outputShape.dim(1) * // number of output channels kernelH * kernelW)); GetDataFromBlob(layerParam, weightData, 0); const unsigned int weightDimSizes[4] = { static_cast(outputShape.dim(1)), static_cast(inputShape.dim(1)), kernelH, kernelW}; TensorInfo biasInfo; vector biasData; if (desc.m_BiasEnabled) { biasData.resize(boost::numeric_cast(numGroups * outputShape.dim(1)), 1.f); GetDataFromBlob(layerParam, biasData, 1); const unsigned int biasDimSizes[1] = {static_cast(outputShape.dim(1))}; biasInfo = TensorInfo(1, biasDimSizes, DataType::Float32); } const unsigned int numWeightsPerGroup = boost::numeric_cast(weightData.size()) / numGroups; const unsigned int numBiasesPerGroup = boost::numeric_cast(biasData.size()) / numGroups; for (unsigned int g = 0; g < numGroups; ++g) { // Sets the slot index, group 0 should be connected to the 0th output of the splitter // group 1 should be connected to the 1st output of the splitter. // Pulls out the weights for this group from that loaded from the model file earlier. ConstTensor weights(TensorInfo(4, weightDimSizes, DataType::Float32), weightData.data() + numWeightsPerGroup * g); IConnectableLayer* convLayer = nullptr; Optional optionalBiases; if (desc.m_BiasEnabled) { // Pulls out the biases for this group from that loaded from the model file earlier. ConstTensor biases(biasInfo, biasData.data() + numBiasesPerGroup * g); optionalBiases = Optional(biases); } convLayer = m_Network->AddConvolution2dLayer(desc, weights, optionalBiases, convLayerNames[g].c_str()); convLayers[g] = convLayer; // If we have more than one group then the input to the nth convolution the splitter layer's nth output, // otherwise it's the regular input to this layer. armnn::IOutputSlot& splitterInputConnection = splitterLayer ? splitterLayer->GetOutputSlot(g) : inputConnection; splitterInputConnection.Connect(convLayer->GetInputSlot(0)); convLayer->GetOutputSlot(0).SetTensorInfo(BlobShapeToTensorInfo(outputShape)); } // If the convolution was performed in chunks, add a layer to concatenate the results // The merge input shape matches that of the convolution output unsigned int concatDimSizes[4] = {static_cast(outputShape.dim(0)), static_cast(outputShape.dim(1)), static_cast(outputShape.dim(2)), static_cast(outputShape.dim(3))}; // This is used to describe how the input is to be concatenated OriginsDescriptor concatDesc(numGroups); // Now create an input node for each group, using the name from // the output of the corresponding convolution for (unsigned int g = 0; g < numGroups; ++g) { concatDesc.SetViewOriginCoord(g, 1, concatDimSizes[1] * g); } // Make sure the output from the concat is the correct size to hold the data for all groups concatDimSizes[1] *= numGroups; outputShape.set_dim(1, concatDimSizes[1]); // Finally add the concat layer IConnectableLayer* concatLayer = m_Network->AddConcatLayer(concatDesc, layerParam.name().c_str()); if (!concatLayer) { throw ParseException( boost::str( boost::format( "Failed to create final concat layer for Split+Convolution+Concat. " "Layer=%1% #groups=%2% #filters=%3% %4%") % layerParam.name() % numGroups % numFilters % CHECK_LOCATION().AsString())); } for (unsigned int g = 0; g < numGroups; ++g) { convLayers[g]->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(g)); } concatLayer->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo(4, concatDimSizes, DataType::Float32)); SetArmnnOutputSlotForCaffeTop(layerParam.top(0), concatLayer->GetOutputSlot(0)); } void CaffeParserBase::AddConvLayerWithDepthwiseConv(const caffe::LayerParameter& layerParam, const armnn::Convolution2dDescriptor& convDesc, unsigned int kernelW, unsigned int kernelH) { ARMNN_ASSERT(layerParam.type() == "Convolution"); ValidateNumInputsOutputs(layerParam, 1, 1); ConvolutionParameter convParam = layerParam.convolution_param(); BlobShape inputShape = TensorDescToBlobShape(GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo()); DepthwiseConvolution2dDescriptor desc; desc.m_PadLeft = convDesc.m_PadLeft; desc.m_PadRight = convDesc.m_PadRight; desc.m_PadTop = convDesc.m_PadTop; desc.m_PadBottom = convDesc.m_PadBottom; desc.m_StrideX = convDesc.m_StrideX; desc.m_StrideY = convDesc.m_StrideY; desc.m_BiasEnabled = convDesc.m_BiasEnabled; unsigned int numFilters = convParam.num_output(); BlobShape outputShape; outputShape.add_dim(0); outputShape.set_dim(0, inputShape.dim(0)); outputShape.add_dim(1); outputShape.set_dim(1, numFilters); outputShape.add_dim(2); outputShape.set_dim( 2, (static_cast( static_cast(inputShape.dim(2) + 2 * desc.m_PadBottom - kernelH) / static_cast(desc.m_StrideY)) + 1)); outputShape.add_dim(3); outputShape.set_dim( 3, (static_cast( static_cast(inputShape.dim(3) + 2 * desc.m_PadRight - kernelW) / static_cast(desc.m_StrideX)) + 1)); // Load the weight data size_t allWeightsSize = boost::numeric_cast(inputShape.dim(1) * kernelH * kernelW); vector weightData(allWeightsSize); GetDataFromBlob(layerParam, weightData, 0); // depth multiplier will be 1 for the depthwise convolution const unsigned int weightDimSizes[4] = { static_cast(1), // depth multiplier static_cast(inputShape.dim(1)), // #channels kernelH, kernelW}; armnn::IConnectableLayer* returnLayer = nullptr; ConstTensor weights(TensorInfo(4, weightDimSizes, DataType::Float32), weightData.data()); Optional optionalBiases; vector biasData; if (desc.m_BiasEnabled) { TensorInfo biasInfo; biasData.resize(boost::numeric_cast(outputShape.dim(1)), 1.f); GetDataFromBlob(layerParam, biasData, 1); const unsigned int biasDimSizes[1] = {static_cast(outputShape.dim(1))}; biasInfo = TensorInfo(1, biasDimSizes, DataType::Float32); ConstTensor biases(biasInfo, biasData.data()); optionalBiases = Optional(biases); } returnLayer = m_Network->AddDepthwiseConvolution2dLayer(desc, weights, optionalBiases, layerParam.name().c_str()); if (!returnLayer) { throw ParseException( boost::str( boost::format( "Failed to create depthwise convolution layer. " "Layer=%1% #filters=%2% %3%") % layerParam.name() % numFilters % CHECK_LOCATION().AsString())); } armnn::IOutputSlot& inputConnection = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)); inputConnection.Connect(returnLayer->GetInputSlot(0)); returnLayer->GetOutputSlot(0).SetTensorInfo(BlobShapeToTensorInfo(outputShape)); SetArmnnOutputSlotForCaffeTop(layerParam.top(0), returnLayer->GetOutputSlot(0)); } void CaffeParserBase::ParseConvLayer(const LayerParameter& layerParam) { // Ignored Caffe Parameters // * Dilation Size // * Weight Filler // * Bias Filler // * Engine // * Force nd_im2col // * Axis // Not Available ArmNN Interface Parameters // * Rounding policy; ARMNN_ASSERT(layerParam.type() == "Convolution"); ValidateNumInputsOutputs(layerParam, 1, 1); ConvolutionParameter convParam = layerParam.convolution_param(); BlobShape inputShape = TensorDescToBlobShape(GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo()); const unsigned int numGroups = convParam.has_group() ? convParam.group() : 1; unsigned int numFilters = convParam.num_output(); const auto notFound = std::numeric_limits::max(); unsigned int kernelH = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter, kernel_h, kernel_size, unsigned int, notFound); unsigned int kernelW = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter, kernel_w, kernel_size, unsigned int, notFound); unsigned int strideH = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter, stride_h, stride, unsigned int, 1u); unsigned int strideW = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter, stride_w, stride, unsigned int, 1u); unsigned int padH = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter, pad_h, pad, unsigned int, 0u); unsigned int padW = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter, pad_w, pad, unsigned int, 0u); Convolution2dDescriptor convolution2dDescriptor; convolution2dDescriptor.m_PadLeft = padW; convolution2dDescriptor.m_PadRight = padW; convolution2dDescriptor.m_PadTop = padH; convolution2dDescriptor.m_PadBottom = padH; convolution2dDescriptor.m_StrideX = strideW; convolution2dDescriptor.m_StrideY = strideH; convolution2dDescriptor.m_BiasEnabled = convParam.has_bias_term() ? convParam.bias_term() : true; if (numGroups > numFilters) { throw ParseException( boost::str( boost::format( "Error parsing Convolution: %1%. " "The 'group'=%2% parameter cannot be larger than the " "number of filters supplied ='%3%'. %4%") % layerParam.name() % numGroups % numFilters % CHECK_LOCATION().AsString())); } if (inputShape.dim_size() != 4) { throw ParseException( boost::str( boost::format( "Convolution input shape is expected to have 4 dimensions. " "%1%'s input has only %2%. %3%") % layerParam.name() % inputShape.dim_size() % CHECK_LOCATION().AsString())); } if (numGroups > 1) { if (numGroups > inputShape.dim(1)) { throw ParseException( boost::str( boost::format( "Error parsing Convolution: %1%. " "The 'group'=%2% parameter cannot be larger than the " "channel of the input shape=%3% (in NCHW format). %4%") % layerParam.name() % numGroups % inputShape.dim(1) % CHECK_LOCATION().AsString())); } else if (numGroups == inputShape.dim(1)) { // we use a depthwise convolution here, because the number of groups equals to the // input channels AddConvLayerWithDepthwiseConv(layerParam, convolution2dDescriptor, kernelW, kernelH); return; } else { // we split the input by channels into channels/groups separate convolutions // and concatenate the results afterwards AddConvLayerWithSplits(layerParam, convolution2dDescriptor, kernelW, kernelH); return; } } // NOTE: at this point we only need to handle #group=1 case, all other cases should be // handled by the AddConvLayer* helpers // Populate convolution output tensor descriptor dimensions BlobShape outputShape; outputShape.add_dim(0); outputShape.set_dim(0, inputShape.dim(0)); outputShape.add_dim(1); outputShape.set_dim(1, numFilters); outputShape.add_dim(2); outputShape.set_dim( 2, (static_cast( static_cast(inputShape.dim(2) + 2 * padH - kernelH) / static_cast(strideH)) + 1)); outputShape.add_dim(3); outputShape.set_dim( 3, (static_cast( static_cast(inputShape.dim(3) + 2 * padW - kernelW) / static_cast(strideW)) + 1)); // Load the weight data for ALL groups vector weightData(boost::numeric_cast(inputShape.dim(1) * outputShape.dim(1) * kernelH * kernelW)); GetDataFromBlob(layerParam, weightData, 0); const unsigned int weightDimSizes[4] = { static_cast(outputShape.dim(1)), // output channels static_cast(inputShape.dim(1)), // input channels kernelH, kernelW}; armnn::IConnectableLayer* returnLayer = nullptr; // Pull out the weights for this group from that loaded from the model file earlier ConstTensor weights(TensorInfo(4, weightDimSizes, DataType::Float32), weightData.data()); Optional optionalBiases; vector biasData; if (convolution2dDescriptor.m_BiasEnabled) { TensorInfo biasInfo; biasData.resize(boost::numeric_cast(outputShape.dim(1)), 1.f); GetDataFromBlob(layerParam, biasData, 1); const unsigned int biasDimSizes[1] = {static_cast(outputShape.dim(1))}; biasInfo = TensorInfo(1, biasDimSizes, DataType::Float32); // Pull out the biases for this group from that loaded from the model file earlier ConstTensor biases(biasInfo, biasData.data()); optionalBiases = Optional(biases); } returnLayer = m_Network->AddConvolution2dLayer(convolution2dDescriptor, weights, optionalBiases, layerParam.name().c_str()); armnn::IOutputSlot& inputConnection = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)); inputConnection.Connect(returnLayer->GetInputSlot(0)); returnLayer->GetOutputSlot(0).SetTensorInfo(BlobShapeToTensorInfo(outputShape)); if (!returnLayer) { throw ParseException( boost::str( boost::format( "Failed to create Convolution layer. " "Layer=%1% #groups=%2% #filters=%3% %4%") % layerParam.name() % numGroups % numFilters % CHECK_LOCATION().AsString())); } SetArmnnOutputSlotForCaffeTop(layerParam.top(0), returnLayer->GetOutputSlot(0)); } void CaffeParserBase::ParsePoolingLayer(const LayerParameter& layerParam) { // Ignored Caffe Parameters // Stochastic Pooling // Engine ValidateNumInputsOutputs(layerParam, 1, 1); PoolingParameter param = layerParam.pooling_param(); const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo(); const auto notFound = std::numeric_limits::max(); unsigned int kernel_h = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter, kernel_h, kernel_size, unsigned int, notFound); unsigned int kernel_w = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter, kernel_w, kernel_size, unsigned int, notFound); if ((kernel_h == notFound || kernel_w == notFound) && param.has_global_pooling()) { kernel_h = inputInfo.GetShape()[2]; kernel_w = inputInfo.GetShape()[3]; } unsigned int stride_h = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter, stride_h, stride, unsigned int, notFound); unsigned int stride_w = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter, stride_h, stride, unsigned int, notFound); if ((stride_h == notFound || stride_w == notFound) && param.has_global_pooling()) { stride_h = 1; stride_w = 1; } unsigned int pad_h = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter, pad_h, pad, unsigned int, 0u); unsigned int pad_w = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter, pad_w, pad, unsigned int, 0u); // Populate Weight and Bias Filter Descriptor Pooling2dDescriptor pooling2dDescriptor; if (param.has_pool()) { PoolingParameter_PoolMethod p = param.pool(); switch (p) { case PoolingParameter_PoolMethod_MAX: { pooling2dDescriptor.m_PoolType = PoolingAlgorithm::Max; break; } case PoolingParameter_PoolMethod_AVE: { pooling2dDescriptor.m_PoolType = PoolingAlgorithm::Average; break; } case PoolingParameter_PoolMethod_STOCHASTIC: { throw ParseException( boost::str( boost::format( "Pooling Layer: Stochastic Pooling Not Supported. Layer=%1% %2%") % layerParam.name() % CHECK_LOCATION().AsString())); } default: { throw ParseException( boost::str( boost::format( "Pooling Layer: unknown pooling method: %1% for layer: %2% %3%") % p % layerParam.name() % CHECK_LOCATION().AsString())); } } } else { throw ParseException( boost::str( boost::format( "No Pooling Method Defined for %1% %2%") % layerParam.name() % CHECK_LOCATION().AsString())); } pooling2dDescriptor.m_PadLeft = pad_w; pooling2dDescriptor.m_PadRight = pad_w; pooling2dDescriptor.m_PadTop = pad_h; pooling2dDescriptor.m_PadBottom = pad_h; pooling2dDescriptor.m_StrideX = stride_w; pooling2dDescriptor.m_StrideY = stride_h; pooling2dDescriptor.m_PoolWidth = kernel_w; pooling2dDescriptor.m_PoolHeight = kernel_h; pooling2dDescriptor.m_OutputShapeRounding = OutputShapeRounding::Ceiling; pooling2dDescriptor.m_PaddingMethod = PaddingMethod::IgnoreValue; armnn::IConnectableLayer* poolingLayer = m_Network->AddPooling2dLayer(pooling2dDescriptor, layerParam.name().c_str()); TensorInfo outputInfo( { inputInfo.GetShape()[0], inputInfo.GetShape()[1], static_cast(ceil( static_cast(inputInfo.GetShape()[2] + 2 * pad_h - kernel_h) / boost::numeric_cast(stride_h))) + 1, static_cast(ceil( static_cast(inputInfo.GetShape()[3] + 2 * pad_w - kernel_w) / boost::numeric_cast(stride_w))) + 1 }, DataType::Float32); GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(poolingLayer->GetInputSlot(0)); poolingLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); SetArmnnOutputSlotForCaffeTop(layerParam.top(0), poolingLayer->GetOutputSlot(0)); } void CaffeParserBase::ParseReluLayer(const LayerParameter& layerParam) { ValidateNumInputsOutputs(layerParam, 1, 1); const string& name = layerParam.name(); const ReLUParameter& param = layerParam.relu_param(); ActivationDescriptor activationDescriptor; const float negativeSlope = param.negative_slope(); if (negativeSlope == 0.0f) { activationDescriptor.m_Function = ActivationFunction::ReLu; } else { activationDescriptor.m_Function = ActivationFunction::LeakyReLu; activationDescriptor.m_A = negativeSlope; } const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo(); IConnectableLayer* const activationLayer = m_Network->AddActivationLayer(activationDescriptor, name.c_str()); GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(activationLayer->GetInputSlot(0)); activationLayer->GetOutputSlot(0).SetTensorInfo(inputInfo); SetArmnnOutputSlotForCaffeTop(layerParam.top(0), activationLayer->GetOutputSlot(0)); } void CaffeParserBase::ParseLRNLayer(const LayerParameter& layerParam) { ValidateNumInputsOutputs(layerParam, 1, 1); LRNParameter param = layerParam.lrn_param(); const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo(); // Ignored BATCH NORMALIZATION Caffe Parameters. // Ignored MVN Caffe Parameters. // Ignored LRN Caffe Parameters. // Engine NormalizationDescriptor normalizationDescriptor; if (param.has_norm_region()) { LRNParameter_NormRegion n = param.norm_region(); switch (n) { case LRNParameter_NormRegion_ACROSS_CHANNELS: { normalizationDescriptor.m_NormChannelType = NormalizationAlgorithmChannel::Across; break; } case LRNParameter_NormRegion_WITHIN_CHANNEL: { normalizationDescriptor.m_NormChannelType = NormalizationAlgorithmChannel::Within; break; } default: { throw ParseException( boost::str( boost::format( "Unknown region %1% for LRN layer %2% %3%") % n % layerParam.name() % CHECK_LOCATION().AsString())); } } } else { // Caffe defaults to normalization across channels. normalizationDescriptor.m_NormChannelType = NormalizationAlgorithmChannel::Across; } normalizationDescriptor.m_NormMethodType = NormalizationAlgorithmMethod::LocalBrightness; if (param.has_local_size()) { normalizationDescriptor.m_NormSize = param.local_size(); } else { throw ParseException( boost::str( boost::format( "local_size not defined for LRN layer %1% %2%") % layerParam.name() % CHECK_LOCATION().AsString())); } if (param.has_alpha()) { normalizationDescriptor.m_Alpha = param.alpha(); normalizationDescriptor.m_Alpha /= boost::numeric_cast(param.local_size()); } else { throw ParseException( boost::str( boost::format( "Alpha not defined for LRN layer %1% %2%") % layerParam.name() % CHECK_LOCATION().AsString())); } if (param.has_beta()) { normalizationDescriptor.m_Beta = param.beta(); } else { throw ParseException( boost::str( boost::format( "Beta not defined for LRN layer %1% %2%") % layerParam.name() % CHECK_LOCATION().AsString())); } if (param.has_k()) { normalizationDescriptor.m_K = param.k(); } else { normalizationDescriptor.m_K = 1; } IConnectableLayer* const normLayer = m_Network->AddNormalizationLayer(normalizationDescriptor, layerParam.name().c_str()); GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(normLayer->GetInputSlot(0)); normLayer->GetOutputSlot(0).SetTensorInfo(inputInfo); SetArmnnOutputSlotForCaffeTop(layerParam.top(0), normLayer->GetOutputSlot(0)); } void CaffeParserBase::ParseInnerProductLayer(const LayerParameter& layerParam) { InnerProductParameter param = layerParam.inner_product_param(); ValidateNumInputsOutputs(layerParam, 1, 1); unsigned int outputSize = param.num_output(); // Ignored Caffe Parameters: // Weight Filler // Bias Filler // Engine // Axis FullyConnectedDescriptor tensorFullyConnectedDescriptor; if (param.has_transpose()) { // If true, assumes transposed weights. tensorFullyConnectedDescriptor.m_TransposeWeightMatrix = param.transpose(); } else { // Caffe defaults to transposed. tensorFullyConnectedDescriptor.m_TransposeWeightMatrix = true; } const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo(); TensorInfo weightInfo; TensorInfo biasInfo; // Allows implicit flattening of extra dimensions. unsigned int inputSize = inputInfo.GetShape()[1]; for (unsigned int i = 2; i < inputInfo.GetNumDimensions(); ++i) { inputSize *= inputInfo.GetShape()[i]; } const float* weightDataPtr = GetArrayPtrFromBlob(layerParam, 0); const unsigned int swTD[2] = { outputSize, inputSize }; ConstTensor weights(TensorInfo(2, swTD, DataType::Float32), weightDataPtr); tensorFullyConnectedDescriptor.m_BiasEnabled = true; // Todo: check whether bias enabled. armnn::IConnectableLayer* fullyConnectedLayer = nullptr; if (tensorFullyConnectedDescriptor.m_BiasEnabled) { // BIAS VALUE const float* biasDataPtr = GetArrayPtrFromBlob(layerParam, 1); const unsigned int sbTD[1] = { outputSize }; ConstTensor biases(TensorInfo(1, sbTD, DataType::Float32), biasDataPtr); fullyConnectedLayer = m_Network->AddFullyConnectedLayer(tensorFullyConnectedDescriptor, weights, Optional(biases), layerParam.name().c_str()); } else { fullyConnectedLayer = m_Network->AddFullyConnectedLayer(tensorFullyConnectedDescriptor, weights, EmptyOptional(), layerParam.name().c_str()); } TensorInfo outputInfo({ inputInfo.GetShape()[0], outputSize }, DataType::Float32); GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(fullyConnectedLayer->GetInputSlot(0)); fullyConnectedLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); SetArmnnOutputSlotForCaffeTop(layerParam.top(0), fullyConnectedLayer->GetOutputSlot(0)); } void CaffeParserBase::ParseSoftmaxLayer(const LayerParameter& layerParam) { ValidateNumInputsOutputs(layerParam, 1, 1); SoftmaxParameter param = layerParam.softmax_param(); const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo(); // Ignored Caffe Parameters: // axis // Engine armnn::SoftmaxDescriptor softmaxDescriptor; softmaxDescriptor.m_Axis = -1; armnn::IConnectableLayer* const softmaxLayer = m_Network->AddSoftmaxLayer( softmaxDescriptor, layerParam.name().c_str()); GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(softmaxLayer->GetInputSlot(0)); softmaxLayer->GetOutputSlot(0).SetTensorInfo(inputInfo); SetArmnnOutputSlotForCaffeTop(layerParam.top(0), softmaxLayer->GetOutputSlot(0)); } void CaffeParserBase::ParseEltwiseLayer(const LayerParameter& layerParam) { ValidateNumInputsOutputs(layerParam, 2, 1); const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo(); // Ignored Caffe Parameters: // coeff EltwiseParameter_EltwiseOp operation = EltwiseParameter_EltwiseOp_SUM; // Defaults to sum as per caffe. if (layerParam.has_eltwise_param() && layerParam.eltwise_param().has_operation()) { operation = layerParam.eltwise_param().operation(); } armnn::IConnectableLayer* newLayer = nullptr; switch (operation) { case EltwiseParameter_EltwiseOp_SUM: { newLayer = m_Network->AddAdditionLayer(layerParam.name().c_str()); break; } case EltwiseParameter_EltwiseOp_PROD: { newLayer = m_Network->AddMultiplicationLayer(layerParam.name().c_str()); break; } default: { throw ParseException( boost::str( boost::format( "Unsupported operation %1% in Eltwise layer %2% %3%") % operation % layerParam.name() % CHECK_LOCATION().AsString())); } } GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(newLayer->GetInputSlot(0)); GetArmnnOutputSlotForCaffeTop(layerParam.bottom(1)).Connect(newLayer->GetInputSlot(1)); newLayer->GetOutputSlot(0).SetTensorInfo(inputInfo); SetArmnnOutputSlotForCaffeTop(layerParam.top(0), newLayer->GetOutputSlot(0)); } void CaffeParserBase::ParseConcatLayer(const LayerParameter& layerParam) { unsigned int numInputs = static_cast(layerParam.bottom_size()); // We assume concat happens along the channel dimension, which is 1 in (0, 1, 2, 3). unsigned int concatDim = 1; unsigned int numOfDims = 4; // we only consider 4-D tensor here OriginsDescriptor concatDescriptor(static_cast(numInputs), numOfDims); std::vectormergeDimSizes(numOfDims, 0u); unsigned int mergeDim = 0; for (unsigned int viewIndex = 0; viewIndex < numInputs; ++viewIndex) { const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop( layerParam.bottom(boost::numeric_cast(viewIndex))).GetTensorInfo(); // Checks whether the dimensions of the input tensors are actually 4. if (inputInfo.GetNumDimensions()!=4) { throw ParseException( boost::str( boost::format( "The number of dimensions for input tensors of " "the concatenation op should be 4. Inputs of %1% has " "%2% dimensions. %3%") % layerParam.name() % inputInfo.GetNumDimensions() % CHECK_LOCATION().AsString())); } mergeDimSizes[0] = inputInfo.GetShape()[0]; mergeDimSizes[1] = inputInfo.GetShape()[1]; mergeDimSizes[2] = inputInfo.GetShape()[2]; mergeDimSizes[3] = inputInfo.GetShape()[3]; for (unsigned int j = 0; j < concatDim; ++j) { concatDescriptor.SetViewOriginCoord(viewIndex, j, 0); } concatDescriptor.SetViewOriginCoord(viewIndex, concatDim, mergeDim); mergeDim += mergeDimSizes[concatDim]; for (unsigned int j = concatDim+1; j < numOfDims; ++j) { concatDescriptor.SetViewOriginCoord(viewIndex, j, 0); } } mergeDimSizes[concatDim] = mergeDim; armnn::IConnectableLayer* concatlayer = m_Network->AddConcatLayer(concatDescriptor, layerParam.name().c_str()); for (unsigned int i = 0; i < numInputs; ++i) { armnn::IOutputSlot& outputSlot = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(boost::numeric_cast(i))); outputSlot.Connect(concatlayer->GetInputSlot(i)); } concatlayer->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo(numOfDims, mergeDimSizes.data(), DataType::Float32)); SetArmnnOutputSlotForCaffeTop(layerParam.top(0), concatlayer->GetOutputSlot(0)); } void CaffeParserBase::ParseBatchNormLayer(const LayerParameter& layerParam) { ValidateNumInputsOutputs(layerParam, 1, 1); const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo(); string name = layerParam.name(); BatchNormParameter param = layerParam.batch_norm_param(); // If use_global_stats is not explicitly set in the model, assume it to be true (its default value // when the network is in the testing phase). if (param.has_use_global_stats()) { if (!param.use_global_stats()) { throw ParseException( boost::str( boost::format( "Error parsing Batch Norm layer '%1%': " "Parameter 'use_global_stats' is set to false, which is " "unsupported (value used for training). %2%") % name % CHECK_LOCATION().AsString())); } } BatchNormalizationDescriptor desc; desc.m_Eps = param.eps(); unsigned int channels = inputInfo.GetShape()[1]; unsigned int shape[] = {channels}; vector meanData(channels); GetDataFromBlob(layerParam, meanData, 0); vector varianceData(channels); GetDataFromBlob(layerParam, varianceData, 1); // Reads moving average factor and applies scaling (if required). const BlobProto& blob = layerParam.blobs(boost::numeric_cast(2)); const float movingAverageFactor = blob.data(boost::numeric_cast(0)); if(movingAverageFactor != 0.0f) { const float scaleFactor = 1.0f / movingAverageFactor; auto scaleFunction = [scaleFactor](float f) -> float { return f * scaleFactor; }; std::transform(varianceData.begin(), varianceData.end(), varianceData.begin(), scaleFunction); std::transform(meanData.begin(), meanData.end(), meanData.begin(), scaleFunction); } // Identifies scale operation. vector betaData(channels, 0.0f); vector gammaData(channels, 1.0f); ConstTensor mean(TensorInfo(1, shape, armnn::DataType::Float32), meanData); ConstTensor variance(TensorInfo(1, shape, armnn::DataType::Float32), varianceData); ConstTensor beta(TensorInfo(1, shape, armnn::DataType::Float32), betaData); ConstTensor gamma(TensorInfo(1, shape, armnn::DataType::Float32), gammaData); armnn::IConnectableLayer* const batchNormLayer = m_Network->AddBatchNormalizationLayer(desc, mean, variance, beta, gamma, name.c_str()); GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(batchNormLayer->GetInputSlot(0)); batchNormLayer->GetOutputSlot(0).SetTensorInfo(inputInfo); SetArmnnOutputSlotForCaffeTop(layerParam.top(0), batchNormLayer->GetOutputSlot(0)); } void CaffeParserBase::ParseScaleLayer(const LayerParameter& layerParam) { // Current unoptimal solution: add a batchnormalization layer with 0 mean and 1 variance. ValidateNumInputsOutputs(layerParam, 1, 1); const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo(); string name = layerParam.name(); ScaleParameter param = layerParam.scale_param(); if (param.axis() != 1) { // Would have to use something other than BatchNormalizationLayer in this case throw ParseException( boost::str( boost::format( "Loading Scale Layer: Only axis 1 is supported currently. " "Layer=%1% Axis=%2% %3%") % layerParam.name() % param.axis() % CHECK_LOCATION().AsString())); } unsigned int channels = inputInfo.GetShape()[1]; unsigned int shape[] = {channels}; BatchNormalizationDescriptor desc; desc.m_Eps = 0.0f; // Don't need epsilon if variance is 1. vector meanData(channels, 0.0f); vector varianceData(channels, 1.0f); vector betaData(channels, 0.0f); vector gammaData(channels); GetDataFromBlob(layerParam, gammaData, 0); if(param.has_bias_term()) { GetDataFromBlob(layerParam, betaData, 1); } ConstTensor mean(TensorInfo(1, shape, armnn::DataType::Float32), meanData); ConstTensor variance(TensorInfo(1, shape, armnn::DataType::Float32), varianceData); ConstTensor beta(TensorInfo(1, shape, armnn::DataType::Float32), betaData); ConstTensor gamma(TensorInfo(1, shape, armnn::DataType::Float32), gammaData); armnn::IConnectableLayer* const batchNormLayer = m_Network->AddBatchNormalizationLayer(desc, mean, variance, beta, gamma, name.c_str()); GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(batchNormLayer->GetInputSlot(0)); batchNormLayer->GetOutputSlot(0).SetTensorInfo(inputInfo); SetArmnnOutputSlotForCaffeTop(layerParam.top(0), batchNormLayer->GetOutputSlot(0)); } void CaffeParserBase::ParseSplitLayer(const caffe::LayerParameter& layerParam) { // Used in caffe to duplicate memory - not necessary in armnn. if (layerParam.bottom_size() != 1) { throw ParseException( boost::str( boost::format( "Split layer '%1%' should have exactly 1 bottom. " "#bottoms=%2% %3%") % layerParam.name() % layerParam.bottom_size() % CHECK_LOCATION().AsString())); } armnn::IOutputSlot& outputSlot = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)); for (int i = 0; i < layerParam.top_size(); i++) { SetArmnnOutputSlotForCaffeTop(layerParam.top(i), outputSlot); } } void CaffeParserBase::ParseDropoutLayer(const caffe::LayerParameter& layerParam) { // Ignored for inference, so patch the single input to its single output. if (layerParam.bottom_size() != 1 || layerParam.top_size() != 1) { throw ParseException( boost::str( boost::format( "Dropout layer '%1%' should have exactly 1 bottom and 1 top. " "#bottoms=%2% #tops=%3% %4%") % layerParam.name() % layerParam.bottom_size() % layerParam.top_size() % CHECK_LOCATION().AsString())); } SetArmnnOutputSlotForCaffeTop(layerParam.top(0), GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0))); } void CaffeParserBase::TrackInputBinding(armnn::IConnectableLayer* layer, armnn::LayerBindingId id, const armnn::TensorInfo& tensorInfo) { return TrackBindingPoint(layer, id, tensorInfo, layer->GetName(), m_NetworkInputsBindingInfo); } void CaffeParserBase::TrackOutputBinding(armnn::IConnectableLayer* layer, armnn::LayerBindingId id, const armnn::TensorInfo& tensorInfo) { return TrackBindingPoint(layer, id, tensorInfo, layer->GetName(), m_NetworkOutputsBindingInfo); } void CaffeParserBase::TrackBindingPoint(armnn::IConnectableLayer* layer, armnn::LayerBindingId id, const armnn::TensorInfo& tensorInfo, const char* bindingPointDesc, std::unordered_map& nameToBindingInfo) { const std::string layerName = layer->GetName(); auto it = nameToBindingInfo.find(layerName); if (it == nameToBindingInfo.end()) { nameToBindingInfo[layerName] = std::make_pair(id, tensorInfo); } else { throw ParseException( boost::str( boost::format( "Id %1% used by more than one %2% layer %3%") % id % bindingPointDesc % CHECK_LOCATION().AsString())); } } armnn::IOutputSlot& CaffeParserBase::GetArmnnOutputSlotForCaffeTop(const std::string& caffeTopName) const { auto it = m_ArmnnOutputSlotForCaffeTop.find(caffeTopName); if (it != m_ArmnnOutputSlotForCaffeTop.end()) { return *it->second; } else { throw ParseException( boost::str( boost::format( "Could not find armnn output slot for Caffe top '%1%' %2%") % caffeTopName % CHECK_LOCATION().AsString())); } } void CaffeParserBase::SetArmnnOutputSlotForCaffeTop( const std::string& caffeTopName, armnn::IOutputSlot& armnnOutputSlot) { auto it = m_ArmnnOutputSlotForCaffeTop.find(caffeTopName); if (it == m_ArmnnOutputSlotForCaffeTop.end()) { m_ArmnnOutputSlotForCaffeTop[caffeTopName] = &armnnOutputSlot; } else { throw ParseException( boost::str( boost::format( "Attempting to add duplicate entry for Caffe top '%1%' %2%") % caffeTopName % CHECK_LOCATION().AsString())); } } // Note: can move to CaffeParser when/if we optimise the text/string format // to load on a layer by layer basis void CaffeParserBase::ResolveInPlaceLayers(caffe::NetParameter& netParameter) { // Finds layers with the same top. std::map> layersByTop; for (int layerIdx = 0; layerIdx < netParameter.layer_size(); ++layerIdx) { caffe::LayerParameter& layer = *netParameter.mutable_layer(layerIdx); std::string name = layer.name(); for (int i = 0; i < layer.top_size(); ++i) { layersByTop[layer.top(i)].push_back(&layer); } } // For each set of layers with the same top, resolves them to a linear chain rather than in-place layers. // Note that for 'regular' layers, there will be a single layer in each group and so this will be a no-op. for (auto layersWithSameTopIt : layersByTop) { const std::string& top = layersWithSameTopIt.first; const std::vector& layersWithSameTop = layersWithSameTopIt.second; // Chains the layers together in the order that they are listed in the prototxt (hopefully this is correct). // Note that the last layer will not have its top modified so that other layers will continue to reference it. for (unsigned int layerIdx = 0; layerIdx < layersWithSameTop.size() - 1; ++layerIdx) { caffe::LayerParameter& layer1 = *layersWithSameTop[layerIdx]; caffe::LayerParameter& layer2 = *layersWithSameTop[layerIdx+1]; if (layer1.top_size() != 1) { throw ParseException( boost::str( boost::format( "Node '%1%' is an in-place layer but doesn't have exactly one " "top. It has %2% instead. %3%") % layer1.name() % layer1.top_size() % CHECK_LOCATION().AsString())); } std::string newTop = layer1.name() + "_top"; layer1.set_top(0, newTop); if (layer2.bottom_size() != 1 || layer2.bottom(0) != top) { throw ParseException( boost::str( boost::format( "Node '%1%' is an in-place layer but " "doesn't have exactly one bottom, or it doesn't match its top. " "#bottoms=%2%, first bottom is %3%, top is %4% %5%") % layer2.name() % layer2.bottom(0) % top % CHECK_LOCATION().AsString())); } layer2.set_bottom(0, newTop); } } } // Note: can move to CaffeParser when/if we optimise the text/string format // to load on a layer by layer basis void CaffeParserBase::LoadNetParam(NetParameter& netParameter) { // Caffe models sometimes have an implicit input layer. // In that case, add an explicit one. if (netParameter.input_size() > 0) { LayerParameter* newLayer = netParameter.add_layer(); newLayer->set_type("Input"); newLayer->set_name(netParameter.input(0)); newLayer->add_top(netParameter.input(0)); InputParameter* inputParam = newLayer->mutable_input_param(); BlobShape* shape = inputParam->add_shape(); int dim_size = netParameter.input_dim_size(); for (int i = 0; i < dim_size; ++i) { shape->add_dim(netParameter.input_dim(i)); } } // Replaces in-place layers with regular ones to make the rest of the parsing easier. ResolveInPlaceLayers(netParameter); // Creates a lookup of Caffe layers by name. for (int i = 0; i < netParameter.layer_size(); ++i) { const caffe::LayerParameter& layer = netParameter.layer(i); for (int i = 0; i < layer.top_size(); ++i) { m_CaffeLayersByTopName[layer.top(i)] = &layer; } } // Finds the output layers the user requested. std::vector targetLayers; for (const std::string& requestedOutputName : m_RequestedOutputs) { auto nodeIt = m_CaffeLayersByTopName.find(requestedOutputName); if (nodeIt == m_CaffeLayersByTopName.end()) { throw ParseException( boost::str( boost::format( "Couldn't find requested output layer '%1%' in graph %2%") % requestedOutputName % CHECK_LOCATION().AsString())); } targetLayers.push_back(nodeIt->second); } // Sorts them into a linear ordering such that all inputs of a node are before the node itself. std::vector sortedNodes; if (!armnnUtils::GraphTopologicalSort( targetLayers, [this](const caffe::LayerParameter* node) { return GetInputs(*node); }, sortedNodes)) { throw ParseException( boost::str( boost::format( "Cycle detected in graph. #nodes: %1% %2%") % sortedNodes.size() % CHECK_LOCATION().AsString())); } // Parses each node in order, knowing that all inputs of a node will be processed before the node itself. for (const caffe::LayerParameter* current : sortedNodes) { auto it = ms_CaffeLayerNameToParsingFunctions.find(current->type()); if (it == ms_CaffeLayerNameToParsingFunctions.end()) { throw ParseException( boost::str( boost::format("Unsupported layer type: '%1%' for layer %2% %3%") % current->type() % current->name() % CHECK_LOCATION().AsString())); } auto func = it->second; (this->*func)(*current); } // Adds ArmNN output layers connected to each requested output. for (const std::string& requestedOutput : m_RequestedOutputs) { armnn::IOutputSlot& outputSlot = GetArmnnOutputSlotForCaffeTop(requestedOutput); const armnn::LayerBindingId outputId = boost::numeric_cast( m_NetworkOutputsBindingInfo.size()); armnn::IConnectableLayer* const outputLayer = m_Network->AddOutputLayer(outputId, requestedOutput.c_str()); outputSlot.Connect(outputLayer->GetInputSlot(0)); TrackOutputBinding(outputLayer, outputId, outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo()); } } INetworkPtr CaffeParserBase::CreateNetworkFromTextFile(const char* graphFile, const std::map& inputShapes, const std::vector& requestedOutputs) { FILE* fd = fopen(graphFile, "r"); if (fd == nullptr) { throw FileNotFoundException( boost::str( boost::format( "Failed to open graph file: %1% %2%") % graphFile % CHECK_LOCATION().AsString())); } // Parses the file into a message. NetParameter netParam; auto input = new google::protobuf::io::FileInputStream(fileno(fd)); bool success = google::protobuf::TextFormat::Parse(input, &netParam); delete input; fclose(fd); if (!success) { throw ParseException( boost::str( boost::format( "Failed to parse graph file: %1% %2%") % graphFile % CHECK_LOCATION().AsString())); } return CreateNetworkFromNetParameter(netParam, inputShapes, requestedOutputs); } INetworkPtr CaffeParserBase::CreateNetworkFromString(const char* protoText, const std::map& inputShapes, const std::vector& requestedOutputs) { // Parses the string into a message. NetParameter netParam; bool success = google::protobuf::TextFormat::ParseFromString(protoText, &netParam); if (!success) { throw ParseException( boost::str( boost::format( "Failed to parse graph string %1%") % CHECK_LOCATION().AsString())); } return CreateNetworkFromNetParameter(netParam, inputShapes, requestedOutputs); } INetworkPtr CaffeParser::CreateNetworkFromBinaryFile(const char* graphFile, const std::map& inputShapes, const std::vector& requestedOutputs) { FILE* fd = fopen(graphFile, "rb"); if (fd == nullptr) { throw FileNotFoundException( boost::str( boost::format( "Failed to open graph file at: %1% %2%") % graphFile % CHECK_LOCATION().AsString())); } // Parses the file into a message. NetParameter netParam; FileInputStream inStream(fileno(fd)); CodedInputStream codedStream(&inStream); codedStream.SetTotalBytesLimit(INT_MAX, INT_MAX); bool success = netParam.ParseFromCodedStream(&codedStream); fclose(fd); if (!success) { throw ParseException( boost::str( boost::format( "Failed to parse protobuf file: %1% %2%") % graphFile % CHECK_LOCATION().AsString())); } return CreateNetworkFromNetParameter(netParam, inputShapes, requestedOutputs); } // Note: can move to CaffeParser when/if we optimise the text/string format // to load on a layer by layer basis INetworkPtr CaffeParserBase::CreateNetworkFromNetParameter(NetParameter& netParam, const std::map& inputShapes, const std::vector& requestedOutputs) { m_NetworkInputsBindingInfo.clear(); m_NetworkOutputsBindingInfo.clear(); m_Network = INetwork::Create(); m_InputShapes = inputShapes; if (requestedOutputs.size() == 0) { throw ParseException("requestedOutputs must have at least one entry"); } m_RequestedOutputs = requestedOutputs; try { LoadNetParam(netParam); } catch (const ParseException& e) { Cleanup(); throw e; } Cleanup(); return move(m_Network); } void CaffeParserBase::Cleanup() { // cleanup, in case we reuse this parser m_InputShapes.clear(); m_RequestedOutputs.clear(); m_ArmnnOutputSlotForCaffeTop.clear(); // NOTE: when we get the text/string format // optimised for memory then this data structure can // also move to the CaffeParser class m_CaffeLayersByTopName.clear(); } }