From e2220551b7a64b929650ba9a60529c31e70c13c5 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 20 Jul 2018 13:23:44 +0100 Subject: COMPMID-1367: Enable NHWC in graph examples Change-Id: Iabc54a3a1bdcd46a9a921cda39c7c85fef672b72 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/141449 Reviewed-by: Giorgio Arena Reviewed-by: Anthony Barbier Tested-by: Jenkins --- .../core/CL/kernels/CLNormalizationLayerKernel.h | 2 +- arm_compute/core/Helpers.h | 60 +++++--- arm_compute/core/utils/misc/ShapeCalculator.h | 30 ++-- arm_compute/graph/GraphBuilder.h | 9 +- arm_compute/graph/INodeVisitor.h | 16 +- arm_compute/graph/TypePrinter.h | 44 +++--- arm_compute/graph/Types.h | 24 +-- arm_compute/graph/backends/FunctionHelpers.h | 116 ++++++++------- arm_compute/graph/backends/ValidateHelpers.h | 16 +- arm_compute/graph/frontend/Layers.h | 6 +- arm_compute/graph/frontend/Types.h | 6 +- arm_compute/graph/nodes/ConcatenateLayerNode.h | 84 +++++++++++ arm_compute/graph/nodes/ConvolutionLayerNode.h | 6 +- .../graph/nodes/DepthConcatenateLayerNode.h | 77 ---------- .../graph/nodes/DepthwiseConvolutionLayerNode.h | 2 +- arm_compute/graph/nodes/Nodes.h | 2 +- arm_compute/graph/nodes/NodesFwd.h | 2 +- arm_compute/graph/printers/DotGraphPrinter.h | 2 +- examples/graph_alexnet.cpp | 27 ++-- examples/graph_googlenet.cpp | 51 ++++--- examples/graph_inception_v3.cpp | 129 ++++++++-------- examples/graph_inception_v4.cpp | 163 +++++++++++---------- examples/graph_mobilenet.cpp | 2 +- examples/graph_resnet50.cpp | 35 +++-- examples/graph_resnext50.cpp | 32 ++-- examples/graph_squeezenet.cpp | 54 ++++--- examples/graph_squeezenet_v1_1.cpp | 58 ++++---- src/core/CL/cl_kernels/pooling_layer.cl | 4 +- src/core/CL/kernels/CLIm2ColKernel.cpp | 1 - src/core/CL/kernels/CLNormalizationLayerKernel.cpp | 22 +-- src/core/CL/kernels/CLPoolingLayerKernel.cpp | 4 +- .../NEON/kernels/NENormalizationLayerKernel.cpp | 50 ++++--- src/graph/GraphBuilder.cpp | 18 ++- src/graph/backends/CL/CLFunctionsFactory.cpp | 4 +- src/graph/backends/GLES/GCFunctionsFactory.cpp | 50 ++++++- src/graph/backends/GLES/GCNodeValidator.cpp | 6 +- src/graph/backends/NEON/NEFunctionFactory.cpp | 8 +- src/graph/mutators/DepthConcatSubTensorMutator.cpp | 14 +- src/graph/nodes/ConcatenateLayerNode.cpp | 141 ++++++++++++++++++ src/graph/nodes/DepthConcatenateLayerNode.cpp | 125 ---------------- src/graph/printers/DotGraphPrinter.cpp | 10 +- utils/CommonGraphOptions.cpp | 6 +- utils/CommonGraphOptions.h | 2 +- utils/TypePrinter.h | 29 ++++ 44 files changed, 868 insertions(+), 681 deletions(-) create mode 100644 arm_compute/graph/nodes/ConcatenateLayerNode.h delete mode 100644 arm_compute/graph/nodes/DepthConcatenateLayerNode.h create mode 100644 src/graph/nodes/ConcatenateLayerNode.cpp delete mode 100644 src/graph/nodes/DepthConcatenateLayerNode.cpp diff --git a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h index f2d37a781c..beeb8b838e 100644 --- a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h @@ -72,7 +72,7 @@ private: const ICLTensor *_input; ICLTensor *_output; BorderSize _border_size; - bool _is_in_map; + bool _is_norm_across_width; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h index 374e36442b..ef59323073 100644 --- a/arm_compute/core/Helpers.h +++ b/arm_compute/core/Helpers.h @@ -111,28 +111,6 @@ struct is_contained> : is_contained }; } -/** Calculate the number of output tiles required by Winograd Convolution layer. This utility function can be used by the Winograd input transform - * to know the number of tiles on the x and y direction - * - * @param[in] in_dims Spatial dimensions of the input tensor of convolution layer - * @param[in] kernel_size Kernel size - * @param[in] output_tile_size Size of a single output tile - * @param[in] conv_info Convolution info (i.e. pad, stride,...) - * - * @return the number of output tiles along the x and y directions of size "output_tile_size" - */ -inline Size2D compute_winograd_convolution_tiles(const Size2D &in_dims, const Size2D &kernel_size, const Size2D &output_tile_size, const PadStrideInfo &conv_info) -{ - int num_tiles_x = std::ceil((in_dims.width - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / static_cast(output_tile_size.width)); - int num_tiles_y = std::ceil((in_dims.height - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / static_cast(output_tile_size.height)); - - // Clamp in case we provide paddings but we have 1D convolution - num_tiles_x = std::min(num_tiles_x, static_cast(in_dims.width)); - num_tiles_y = std::min(num_tiles_y, static_cast(in_dims.height)); - - return Size2D(num_tiles_x, num_tiles_y); -} - /** Computes bilinear interpolation using the pointer to the top-left pixel and the pixel's distance between * the real coordinates and the smallest following integer coordinates. Input must be in single channel format. * @@ -694,6 +672,44 @@ inline int coords2index(const TensorShape &shape, const Coordinates &coord); * @return The int conversion of the requested data layout index. */ inline size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension); + +/** Calculate the normalization dimension index for a given normalization type + * + * @param[in] layout Data layout of the input and output tensor + * @param[in] info Normalization info + * + * @return Normalization dimension index + */ +inline unsigned int get_normalization_dimension_index(DataLayout layout, const NormalizationLayerInfo &info) +{ + const unsigned int width_idx = get_data_layout_dimension_index(layout, DataLayoutDimension::WIDTH); + const unsigned int channel_idx = get_data_layout_dimension_index(layout, DataLayoutDimension::CHANNEL); + + return info.is_in_map() ? width_idx : channel_idx; +} + +/** Calculate the number of output tiles required by Winograd Convolution layer. This utility function can be used by the Winograd input transform + * to know the number of tiles on the x and y direction + * + * @param[in] in_dims Spatial dimensions of the input tensor of convolution layer + * @param[in] kernel_size Kernel size + * @param[in] output_tile_size Size of a single output tile + * @param[in] conv_info Convolution info (i.e. pad, stride,...) + * + * @return the number of output tiles along the x and y directions of size "output_tile_size" + */ +inline Size2D compute_winograd_convolution_tiles(const Size2D &in_dims, const Size2D &kernel_size, const Size2D &output_tile_size, const PadStrideInfo &conv_info) +{ + int num_tiles_x = std::ceil((in_dims.width - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / static_cast(output_tile_size.width)); + int num_tiles_y = std::ceil((in_dims.height - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / static_cast(output_tile_size.height)); + + // Clamp in case we provide paddings but we have 1D convolution + num_tiles_x = std::min(num_tiles_x, static_cast(in_dims.width)); + num_tiles_y = std::min(num_tiles_y, static_cast(in_dims.height)); + + return Size2D(num_tiles_x, num_tiles_y); +} + } // namespace arm_compute #include "arm_compute/core/Helpers.inl" diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index e5516ba154..dbf26a423d 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -201,15 +201,8 @@ inline TensorShape compute_im2col_fc_shape(const ITensorInfo *input, const int n inline TensorShape compute_im2col_flatten_shape(const ITensorInfo *input) { // The output shape will be the flatten version of the input (i.e. [ width * height * channels, 1, 1, ... ] ). Used for FlattenLayer. - - ARM_COMPUTE_ERROR_ON(input->num_dimensions() < 3); - TensorShape output_shape{ input->tensor_shape() }; - - const size_t flatten_shape = input->dimension(0) * input->dimension(1) * input->dimension(2); - output_shape.set(0, flatten_shape); - output_shape.remove_dimension(1); - output_shape.remove_dimension(1); + output_shape.collapse(3, 0); return output_shape; } @@ -403,20 +396,25 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo } template -inline TensorShape get_shape_from_info(T *info) +inline TensorShape extract_shape(T *data) +{ + return data->info()->tensor_shape(); +} + +inline TensorShape extract_shape(ITensorInfo *data) { - return info->info()->tensor_shape(); + return data->tensor_shape(); } -inline TensorShape get_shape_from_info(ITensorInfo *info) +inline TensorShape extract_shape(const TensorShape *data) { - return info->tensor_shape(); + return *data; } template inline TensorShape calculate_depth_concatenate_shape(const std::vector &inputs_vector) { - TensorShape out_shape = get_shape_from_info(inputs_vector[0]); + TensorShape out_shape = extract_shape(inputs_vector[0]); size_t max_x = 0; size_t max_y = 0; @@ -425,7 +423,7 @@ inline TensorShape calculate_depth_concatenate_shape(const std::vector &inp for(const auto &tensor : inputs_vector) { ARM_COMPUTE_ERROR_ON(tensor == nullptr); - const TensorShape shape = get_shape_from_info(tensor); + const TensorShape shape = extract_shape(tensor); max_x = std::max(shape.x(), max_x); max_y = std::max(shape.y(), max_y); depth += shape.z(); @@ -441,13 +439,13 @@ inline TensorShape calculate_depth_concatenate_shape(const std::vector &inp template inline TensorShape calculate_width_concatenate_shape(const std::vector &inputs_vector) { - TensorShape out_shape = get_shape_from_info(inputs_vector[0]); + TensorShape out_shape = extract_shape(inputs_vector[0]); size_t width = 0; for(const auto &tensor : inputs_vector) { ARM_COMPUTE_ERROR_ON(tensor == nullptr); - const TensorShape shape = get_shape_from_info(tensor); + const TensorShape shape = extract_shape(tensor); width += shape.x(); } diff --git a/arm_compute/graph/GraphBuilder.h b/arm_compute/graph/GraphBuilder.h index 6359e05a63..191848c15f 100644 --- a/arm_compute/graph/GraphBuilder.h +++ b/arm_compute/graph/GraphBuilder.h @@ -128,8 +128,8 @@ public: * @return Node ID of the created node, EmptyNodeID in case of error */ static NodeID add_convolution_node(Graph &g, NodeParams params, NodeIdxPair input, - Size2D kernel_spatial_extend, unsigned int depth, PadStrideInfo conv_info, - unsigned int num_groups = 1, ConvolutionMethod method = ConvolutionMethod::DEFAULT, FastMathHint fast_math_hint = FastMathHint::DISABLED, + Size2D kernel_spatial_extend, unsigned int depth, PadStrideInfo conv_info, unsigned int num_groups = 1, + ConvolutionMethod method = ConvolutionMethod::Default, FastMathHint fast_math_hint = FastMathHint::Disabled, ITensorAccessorUPtr weights_accessor = nullptr, ITensorAccessorUPtr bias_accessor = nullptr, const QuantizationInfo weights_quant_info = QuantizationInfo(), const QuantizationInfo out_quant_info = QuantizationInfo()); @@ -155,10 +155,11 @@ public: * @param[in] g Graph to add the node to * @param[in] params Common node parameters * @param[in] inputs Inputs to the depth concatenate layer node as a NodeID-Index pair + * @param[in] axis Concatenation axis * * @return Node ID of the created node, EmptyNodeID in case of error */ - static NodeID add_depth_concatenate_node(Graph &g, NodeParams params, std::vector inputs); + static NodeID add_concatenate_node(Graph &g, NodeParams params, std::vector inputs, DataLayoutDimension axis); /** Adds a depth-wise convolution layer node to the graph * * @param[in] g Graph to add the node to @@ -175,7 +176,7 @@ public: */ static NodeID add_depthwise_convolution_node(Graph &g, NodeParams params, NodeIdxPair input, Size2D kernel_spatial_extend, PadStrideInfo conv_info, - DepthwiseConvolutionMethod method = DepthwiseConvolutionMethod::DEFAULT, + DepthwiseConvolutionMethod method = DepthwiseConvolutionMethod::Default, ITensorAccessorUPtr weights_accessor = nullptr, ITensorAccessorUPtr bias_accessor = nullptr, const QuantizationInfo quant_info = QuantizationInfo()); /** Adds an element-wise layer node to the graph * diff --git a/arm_compute/graph/INodeVisitor.h b/arm_compute/graph/INodeVisitor.h index b5446c4a55..ad390ad760 100644 --- a/arm_compute/graph/INodeVisitor.h +++ b/arm_compute/graph/INodeVisitor.h @@ -51,6 +51,11 @@ public: * @param[in] n Node to visit. */ virtual void visit(BatchNormalizationLayerNode &n) = 0; + /** Visit ConcatenateLayerNode. + * + * @param[in] n Node to visit. + */ + virtual void visit(ConcatenateLayerNode &n) = 0; /** Visit ConstNode. * * @param[in] n Node to visit. @@ -61,11 +66,6 @@ public: * @param[in] n Node to visit. */ virtual void visit(ConvolutionLayerNode &n) = 0; - /** Visit DepthConcatenateLayerNode. - * - * @param[in] n Node to visit. - */ - virtual void visit(DepthConcatenateLayerNode &n) = 0; /** Visit DepthwiseConvolutionLayerNode. * * @param[in] n Node to visit. @@ -148,15 +148,15 @@ public: { default_visit(); } - virtual void visit(ConstNode &n) override + virtual void visit(ConcatenateLayerNode &n) override { default_visit(); } - virtual void visit(ConvolutionLayerNode &n) override + virtual void visit(ConstNode &n) override { default_visit(); } - virtual void visit(DepthConcatenateLayerNode &n) override + virtual void visit(ConvolutionLayerNode &n) override { default_visit(); } diff --git a/arm_compute/graph/TypePrinter.h b/arm_compute/graph/TypePrinter.h index c3601f2373..7c0bd8cfdd 100644 --- a/arm_compute/graph/TypePrinter.h +++ b/arm_compute/graph/TypePrinter.h @@ -71,15 +71,15 @@ inline ::std::ostream &operator<<(::std::ostream &os, const NodeType &node_type) case NodeType::ChannelShuffleLayer: os << "ChannelShuffleLayer"; break; + case NodeType::ConcatenateLayer: + os << "ConcatenateLayer"; + break; case NodeType::ConvolutionLayer: os << "ConvolutionLayer"; break; case NodeType::DeconvolutionLayer: os << "DeconvolutionLayer"; break; - case NodeType::DepthConcatenateLayer: - os << "DepthConcatenateLayer"; - break; case NodeType::DepthwiseConvolutionLayer: os << "DepthwiseConvolutionLayer"; break; @@ -134,14 +134,14 @@ inline ::std::ostream &operator<<(::std::ostream &os, const EltwiseOperation &el { switch(eltwise_op) { - case EltwiseOperation::ADD: - os << "ADD"; + case EltwiseOperation::Add: + os << "Add"; break; - case EltwiseOperation::MUL: - os << "MUL"; + case EltwiseOperation::Mul: + os << "Mul"; break; - case EltwiseOperation::SUB: - os << "SUB"; + case EltwiseOperation::Sub: + os << "Sub"; break; default: ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); @@ -155,17 +155,17 @@ inline ::std::ostream &operator<<(::std::ostream &os, const ConvolutionMethod &m { switch(method) { - case ConvolutionMethod::DEFAULT: - os << "DEFAULT"; + case ConvolutionMethod::Default: + os << "Default"; break; - case ConvolutionMethod::DIRECT: - os << "DIRECT"; + case ConvolutionMethod::Direct: + os << "Direct"; break; case ConvolutionMethod::GEMM: os << "GEMM"; break; - case ConvolutionMethod::WINOGRAD: - os << "WINOGRAD"; + case ConvolutionMethod::Winograd: + os << "Winograd"; break; default: ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); @@ -179,11 +179,11 @@ inline ::std::ostream &operator<<(::std::ostream &os, const FastMathHint &hint) { switch(hint) { - case FastMathHint::ENABLED: - os << "ENABLED"; + case FastMathHint::Enabled: + os << "Enabled"; break; - case FastMathHint::DISABLED: - os << "DISABLED"; + case FastMathHint::Disabled: + os << "Disabled"; break; default: ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); @@ -197,14 +197,14 @@ inline ::std::ostream &operator<<(::std::ostream &os, const DepthwiseConvolution { switch(method) { - case DepthwiseConvolutionMethod::DEFAULT: + case DepthwiseConvolutionMethod::Default: os << "DEFAULT"; break; case DepthwiseConvolutionMethod::GEMV: os << "GEMV"; break; - case DepthwiseConvolutionMethod::OPTIMIZED_3x3: - os << "OPTIMIZED_3x3"; + case DepthwiseConvolutionMethod::Optimized3x3: + os << "Optimized3x3"; break; default: ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); diff --git a/arm_compute/graph/Types.h b/arm_compute/graph/Types.h index c5b7fb1c51..f22f50ac82 100644 --- a/arm_compute/graph/Types.h +++ b/arm_compute/graph/Types.h @@ -96,33 +96,33 @@ enum class Target /** Supported Element-wise operations */ enum class EltwiseOperation { - ADD, /**< Arithmetic addition */ - SUB, /**< Arithmetic subtraction */ - MUL /**< Arithmetic multiplication */ + Add, /**< Arithmetic addition */ + Sub, /**< Arithmetic subtraction */ + Mul /**< Arithmetic multiplication */ }; /** Supported Convolution layer methods */ enum class ConvolutionMethod { - DEFAULT, /**< Default approach using internal heuristics */ + Default, /**< Default approach using internal heuristics */ GEMM, /**< GEMM based convolution */ - DIRECT, /**< Deep direct convolution */ - WINOGRAD /**< Winograd based convolution */ + Direct, /**< Deep direct convolution */ + Winograd /**< Winograd based convolution */ }; /** Supported Depthwise Convolution layer methods */ enum class DepthwiseConvolutionMethod { - DEFAULT, /**< Default approach using internal heuristics */ - GEMV, /**< Generic GEMV based depthwise convolution */ - OPTIMIZED_3x3, /**< Optimized 3x3 direct depthwise convolution */ + Default, /**< Default approach using internal heuristics */ + GEMV, /**< Generic GEMV based depthwise convolution */ + Optimized3x3, /**< Optimized 3x3 direct depthwise convolution */ }; /** Enable or disable fast math for Convolution layer */ enum class FastMathHint { - ENABLED, /**< Fast math enabled for Convolution layer */ - DISABLED, /**< Fast math disabled for Convolution layer */ + Enabled, /**< Fast math enabled for Convolution layer */ + Disabled, /**< Fast math disabled for Convolution layer */ }; /** Supported nodes */ @@ -131,9 +131,9 @@ enum class NodeType ActivationLayer, BatchNormalizationLayer, ChannelShuffleLayer, + ConcatenateLayer, ConvolutionLayer, DeconvolutionLayer, - DepthConcatenateLayer, DepthwiseConvolutionLayer, EltwiseLayer, FlattenLayer, diff --git a/arm_compute/graph/backends/FunctionHelpers.h b/arm_compute/graph/backends/FunctionHelpers.h index 978d3bc1a8..172f00277e 100644 --- a/arm_compute/graph/backends/FunctionHelpers.h +++ b/arm_compute/graph/backends/FunctionHelpers.h @@ -192,6 +192,52 @@ std::unique_ptr create_channel_shuffle_layer(ChannelShuffleLayerNode return std::move(func); } +/** Create a backend layer concatenate function + * + * @tparam ConcatenateLayerFunction Backend concatenate function + * @tparam TargetInfo Target-specific information + * + * @param[in] node Node to create the backend function for + * + * @return Backend concatenate layer function + */ +template +std::unique_ptr create_concatenate_layer(ConcatenateLayerNode &node) +{ + ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating Concatenate node with ID : " << node.id() << " and Name: " << node.name() << std::endl); + ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1); + + // Return nullptr if depth concatenate is switched off + if(!node.is_enabled()) + { + return nullptr; + } + + // Extract IO and info + std::vector inputs; + for(unsigned int i = 0; i < node.num_inputs(); ++i) + { + inputs.push_back(get_backing_tensor(node.input(i))); + } + typename TargetInfo::TensorType *output = get_backing_tensor(node.output(0)); + const DataLayoutDimension concat_axis = node.concatenation_axis(); + + // Create and configure function + auto func = support::cpp14::make_unique(); + func->configure(inputs, output, concat_axis); + + // Log info + ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << node.type() + << " Target " << TargetInfo::TargetType + << " Data Type: " << output->info()->data_type() + << " Shape: " << output->info()->tensor_shape() + << " Num Inputs: " << inputs.size() + << " Axis: " << concat_axis + << std::endl); + + return std::move(func); +} + /** Create a backend convolution layer function * * @tparam ConvolutionLayerFunctions Backend convolution functions @@ -220,20 +266,20 @@ std::unique_ptr create_convolution_layer(ConvolutionLayerNode &node, const PadStrideInfo conv_info = node.convolution_info(); const ConvolutionMethod conv_algorithm = node.convolution_method(); - const bool fast_math = node.fast_math_hint() == FastMathHint::ENABLED; + const bool fast_math = node.fast_math_hint() == FastMathHint::Enabled; // Create and configure function (we assume that functions have been validated before creation) std::shared_ptr mm = get_memory_manager(ctx, TargetInfo::TargetType); std::unique_ptr func; std::string func_name; - if(conv_algorithm == ConvolutionMethod::WINOGRAD) + if(conv_algorithm == ConvolutionMethod::Winograd) { std::tie(func, func_name) = create_named_memory_managed_function( std::string("WinogradConvolutionLayer"), mm, input, weights, biases, output, conv_info, ActivationLayerInfo(), fast_math); } - else if(conv_algorithm == ConvolutionMethod::DIRECT) + else if(conv_algorithm == ConvolutionMethod::Direct) { std::tie(func, func_name) = create_named_function( std::string("DirectConvolutionLayer"), @@ -308,50 +354,6 @@ std::unique_ptr create_deconvolution_layer(DeconvolutionLayerNode &no return func; } -/** Create a backend layer depth concatenate function - * - * @tparam DepthConcatenateLayerFunction Backend depth concatenate function - * @tparam TargetInfo Target-specific information - * - * @param[in] node Node to create the backend function for - * - * @return Backend depth concatenate layer function - */ -template -std::unique_ptr create_depth_concatenate_layer(DepthConcatenateLayerNode &node) -{ - ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating DepthConcatenate node with ID : " << node.id() << " and Name: " << node.name() << std::endl); - ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1); - - // Return nullptr if depth concatenate is switched off - if(!node.is_enabled()) - { - return nullptr; - } - - // Extract IO and info - std::vector inputs; - for(unsigned int i = 0; i < node.num_inputs(); ++i) - { - inputs.push_back(get_backing_tensor(node.input(i))); - } - typename TargetInfo::TensorType *output = get_backing_tensor(node.output(0)); - - // Create and configure function - auto func = support::cpp14::make_unique(); - func->configure(inputs, output); - - // Log info - ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << node.type() - << " Target " << TargetInfo::TargetType - << " Data Type: " << output->info()->data_type() - << " Shape: " << output->info()->tensor_shape() - << " Num Inputs: " << inputs.size() - << std::endl); - - return std::move(func); -} - /** Create a backend layer depth-wise convolution function * * @tparam DepthwiseConvolutionLayerFunctions Backend depthwise convolution function @@ -383,7 +385,7 @@ std::unique_ptr create_depthwise_convolution_layer(DepthwiseConvoluti // Create and configure function (we assume that functions have been validated before creation) std::unique_ptr func; std::string func_name; - if(dwc_algorithm == DepthwiseConvolutionMethod::OPTIMIZED_3x3) + if(dwc_algorithm == DepthwiseConvolutionMethod::Optimized3x3) { std::tie(func, func_name) = create_named_function( std::string("DepthwiseConvolutionLayer3x3"), @@ -435,19 +437,19 @@ std::unique_ptr create_eltwise_layer(EltwiseLayerNode &node) std::unique_ptr func = nullptr; std::string func_name; - if(eltwise_op == EltwiseOperation::ADD) + if(eltwise_op == EltwiseOperation::Add) { std::tie(func, func_name) = create_named_function( std::string("ArithmeticAddition"), input1, input2, output, convert_policy); } - else if(eltwise_op == EltwiseOperation::SUB) + else if(eltwise_op == EltwiseOperation::Sub) { std::tie(func, func_name) = create_named_function( std::string("ArithmeticSubtraction"), input1, input2, output, convert_policy); } - else if(eltwise_op == EltwiseOperation::MUL) + else if(eltwise_op == EltwiseOperation::Mul) { std::tie(func, func_name) = create_named_function( std::string("PixelWiseMultiplication"), @@ -487,11 +489,12 @@ std::unique_ptr create_flatten_layer(FlattenLayerNode &node) typename TargetInfo::TensorType *input = get_backing_tensor(node.input(0)); typename TargetInfo::TensorType *output = get_backing_tensor(node.output(0)); + ARM_COMPUTE_ERROR_ON(input == nullptr); + ARM_COMPUTE_ERROR_ON(output == nullptr); + // Create and configure function auto func = support::cpp14::make_unique(); func->configure(input, output); - ARM_COMPUTE_ERROR_ON(input == nullptr); - ARM_COMPUTE_ERROR_ON(output == nullptr); // Log info ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << node.type() @@ -526,13 +529,14 @@ std::unique_ptr create_fully_connected_layer(FullyConnectedLayerNode typename TargetInfo::TensorType *output = get_backing_tensor(node.output(0)); const FullyConnectedLayerInfo fc_info = node.info(); - // Create and configure function - auto func = support::cpp14::make_unique(get_memory_manager(ctx, TargetInfo::TargetType)); - func->configure(input, weights, biases, output, fc_info); ARM_COMPUTE_ERROR_ON(input == nullptr); ARM_COMPUTE_ERROR_ON(weights == nullptr); ARM_COMPUTE_ERROR_ON(output == nullptr); + // Create and configure function + auto func = support::cpp14::make_unique(get_memory_manager(ctx, TargetInfo::TargetType)); + func->configure(input, weights, biases, output, fc_info); + // Log info ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << node.type() << " Target " << TargetInfo::TargetType diff --git a/arm_compute/graph/backends/ValidateHelpers.h b/arm_compute/graph/backends/ValidateHelpers.h index 189fbdc9c7..ae52593b03 100644 --- a/arm_compute/graph/backends/ValidateHelpers.h +++ b/arm_compute/graph/backends/ValidateHelpers.h @@ -106,22 +106,22 @@ Status validate_convolution_layer(ConvolutionLayerNode &node) const PadStrideInfo conv_info = node.convolution_info(); const ConvolutionMethod conv_algorithm = node.convolution_method(); - //const bool fast_math = node.fast_math_hint() == FastMathHint::ENABLED; // FIXME (COMPMID-1138): uncomment once NEON and GLES support fast_math + const bool fast_math = node.fast_math_hint() == FastMathHint::Enabled; // Validate function Status status{}; switch(conv_algorithm) { - case ConvolutionMethod::DIRECT: + case ConvolutionMethod::Direct: status = DirectConvolutionLayer::validate(input, weights, biases, output, conv_info); break; case ConvolutionMethod::GEMM: status = GEMMConvolutionLayer::validate(input, weights, biases, output, conv_info); break; - case ConvolutionMethod::WINOGRAD: - status = WinogradConvolutionLayer::validate(input, weights, biases, output, conv_info /*, fast_math*/); + case ConvolutionMethod::Winograd: + status = WinogradConvolutionLayer::validate(input, weights, biases, output, conv_info, ActivationLayerInfo(), fast_math); break; - case ConvolutionMethod::DEFAULT: + case ConvolutionMethod::Default: status = ConvolutionLayer::validate(input, weights, biases, output, conv_info); break; default: @@ -136,7 +136,7 @@ Status validate_convolution_layer(ConvolutionLayerNode &node) { ARM_COMPUTE_LOG_GRAPH_INFO("Switched ConvolutionLayer method of node with ID : " << node.id() << " and Name: " << node.name() << std::endl); - node.set_convolution_method(ConvolutionMethod::DEFAULT); + node.set_convolution_method(ConvolutionMethod::Default); } } @@ -166,11 +166,11 @@ Status validate_depthwise_convolution_layer(DepthwiseConvolutionLayerNode &node) // TODO (geopin01) : Switch when validation is implemented // Validate function - if((dwc_algorithm == DepthwiseConvolutionMethod::OPTIMIZED_3x3) && (weights->tensor_shape()[get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH)] != 3)) + if((dwc_algorithm == DepthwiseConvolutionMethod::Optimized3x3) && (weights->tensor_shape()[get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH)] != 3)) { ARM_COMPUTE_LOG_GRAPH_INFO("Switched DepthwiseConvolutionLayer method of node with ID : " << node.id() << " and Name: " << node.name() << std::endl); - node.set_depthwise_convolution_method(DepthwiseConvolutionMethod::DEFAULT); + node.set_depthwise_convolution_method(DepthwiseConvolutionMethod::Default); } return Status{}; diff --git a/arm_compute/graph/frontend/Layers.h b/arm_compute/graph/frontend/Layers.h index 197d2ea409..02ef56952d 100644 --- a/arm_compute/graph/frontend/Layers.h +++ b/arm_compute/graph/frontend/Layers.h @@ -595,7 +595,7 @@ public: } else if(_branch_merge_method == BranchMergeMethod::DEPTH_CONCATENATE) { - // Collect tail nodes and perform DepthConcatenate + // Collect tail nodes and concatenate std::vector nodes; for(auto &ss : _sub_streams) { @@ -608,14 +608,14 @@ public: } } } - nid = GraphBuilder::add_depth_concatenate_node(s.graph(), common_params, nodes); + nid = GraphBuilder::add_concatenate_node(s.graph(), common_params, nodes, DataLayoutDimension::CHANNEL); } else { ARM_COMPUTE_ERROR_ON(_sub_streams.size() != 2); NodeIdxPair input0 = { _sub_streams[0]->tail_node(), 0 }; NodeIdxPair input1 = { _sub_streams[1]->tail_node(), 0 }; - nid = GraphBuilder::add_elementwise_node(s.graph(), common_params, input0, input1, EltwiseOperation::ADD); + nid = GraphBuilder::add_elementwise_node(s.graph(), common_params, input0, input1, EltwiseOperation::Add); } return nid; } diff --git a/arm_compute/graph/frontend/Types.h b/arm_compute/graph/frontend/Types.h index cd579e2119..f9d4952765 100644 --- a/arm_compute/graph/frontend/Types.h +++ b/arm_compute/graph/frontend/Types.h @@ -64,9 +64,9 @@ enum class BranchMergeMethod struct StreamHints { Target target_hint = { Target::UNSPECIFIED }; /**< Target execution hint */ - ConvolutionMethod convolution_method_hint = { ConvolutionMethod::DEFAULT }; /**< Convolution method hint */ - DepthwiseConvolutionMethod depthwise_convolution_method_hint = { DepthwiseConvolutionMethod::DEFAULT }; /**< Depthwise Convolution method hint */ - FastMathHint fast_math_hint = { FastMathHint::DISABLED }; /**< Fast math hint */ + ConvolutionMethod convolution_method_hint = { ConvolutionMethod::Default }; /**< Convolution method hint */ + DepthwiseConvolutionMethod depthwise_convolution_method_hint = { DepthwiseConvolutionMethod::Default }; /**< Depthwise Convolution method hint */ + FastMathHint fast_math_hint = { FastMathHint::Disabled }; /**< Fast math hint */ }; } // namespace frontend } // namespace graph diff --git a/arm_compute/graph/nodes/ConcatenateLayerNode.h b/arm_compute/graph/nodes/ConcatenateLayerNode.h new file mode 100644 index 0000000000..20c8523752 --- /dev/null +++ b/arm_compute/graph/nodes/ConcatenateLayerNode.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GRAPH_CONCATENATE_LAYER_NODE_H__ +#define __ARM_COMPUTE_GRAPH_CONCATENATE_LAYER_NODE_H__ + +#include "arm_compute/graph/INode.h" + +namespace arm_compute +{ +namespace graph +{ +/** Concatenation Layer node */ +class ConcatenateLayerNode final : public INode +{ +public: + /** Constructor + * + * @param[in] total_nodes Number of nodes that will get concatenated + * @param[in] axis Concatenation axis + */ + ConcatenateLayerNode(unsigned int total_nodes, DataLayoutDimension axis); + /** Computes concatenations output descriptor + * + * @param[in] input_descriptors Input descriptors + * @param[in] axis Concatenation axis + * + * @return Expected output descriptor + */ + static TensorDescriptor compute_output_descriptor(const std::vector &input_descriptors, DataLayoutDimension axis); + /** Disables or not the depth concatenate node + * + * @warning This is used when concatenate is performed using sub-tensors, where this node is used as a placeholder. + * + * @param[in] is_enabled If true a backend function is created to perform the concatenation (involves copying), + * while if false, no function is created and we assume that sub-tensors are properly set to simulate + * a zero copy operation. + */ + void set_enabled(bool is_enabled); + /** Enabled parameter accessor + * + * @return True if a backend function is to be created else false + */ + bool is_enabled() const; + /** Concatenation axis parameter accessor + * + * @return Concatenation axis + */ + DataLayoutDimension concatenation_axis() const; + + // Inherited overridden methods: + NodeType type() const override; + bool forward_descriptors() override; + TensorDescriptor configure_output(size_t idx) const override; + void accept(INodeVisitor &v) override; + +private: + unsigned int _total_nodes; + DataLayoutDimension _axis; + bool _is_enabled; +}; +} // namespace graph +} // namespace arm_compute +#endif /* __ARM_COMPUTE_GRAPH_CONCATENATE_LAYER_NODE_H__ */ diff --git a/arm_compute/graph/nodes/ConvolutionLayerNode.h b/arm_compute/graph/nodes/ConvolutionLayerNode.h index aca60283d7..4299be6bb5 100644 --- a/arm_compute/graph/nodes/ConvolutionLayerNode.h +++ b/arm_compute/graph/nodes/ConvolutionLayerNode.h @@ -41,8 +41,10 @@ public: * @param[in] fast_math_hint (Optional) Fast math hint * @param[in] out_quant_info (Optional) Output quantization info */ - ConvolutionLayerNode(PadStrideInfo info, ConvolutionMethod method = ConvolutionMethod::DEFAULT, FastMathHint fast_math_hint = FastMathHint::DISABLED, - QuantizationInfo out_quant_info = QuantizationInfo()); + ConvolutionLayerNode(PadStrideInfo info, + ConvolutionMethod method = ConvolutionMethod::Default, + FastMathHint fast_math_hint = FastMathHint::Disabled, + QuantizationInfo out_quant_info = QuantizationInfo()); /** Sets the convolution layer method to use * * @param[in] method Method to use for convolution diff --git a/arm_compute/graph/nodes/DepthConcatenateLayerNode.h b/arm_compute/graph/nodes/DepthConcatenateLayerNode.h deleted file mode 100644 index ffdec709ef..0000000000 --- a/arm_compute/graph/nodes/DepthConcatenateLayerNode.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_GRAPH_DEPTH_CONCATENATE_LAYER_NODE_H__ -#define __ARM_COMPUTE_GRAPH_DEPTH_CONCATENATE_LAYER_NODE_H__ - -#include "arm_compute/graph/INode.h" - -namespace arm_compute -{ -namespace graph -{ -/** Depth Concatenation Layer node */ -class DepthConcatenateLayerNode final : public INode -{ -public: - /** Constructor - * - * @param[in] total_nodes Number of nodes that will get concatenated - */ - DepthConcatenateLayerNode(unsigned int total_nodes); - /** Computes depth concatenations output descriptor - * - * @param[in] input_descriptors Input descriptors - * - * @return Expected output descriptor - */ - static TensorDescriptor compute_output_descriptor(const std::vector &input_descriptors); - /** Disables or not the depth concatenate node - * - * @warning This is used when depth concatenate is performed with sub-tensors, - * where this node is used as a placeholder. - * - * @param[in] is_enabled If true a backend function is created to perform the depth concatenation (involves copying), - * while if false, no function is created and we assume that subtensors are properly set to simulate - * a no copy operation. - */ - void set_enabled(bool is_enabled); - /** Enabled parameter accessor - * - * @return True if a backend function is to be created else false - */ - bool is_enabled() const; - - // Inherited overridden methods: - NodeType type() const override; - bool forward_descriptors() override; - TensorDescriptor configure_output(size_t idx) const override; - void accept(INodeVisitor &v) override; - -private: - unsigned int _total_nodes; - bool _is_enabled; -}; -} // namespace graph -} // namespace arm_compute -#endif /* __ARM_COMPUTE_GRAPH_DEPTH_CONCATENATE_LAYER_NODE_H__ */ diff --git a/arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h b/arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h index df6f456ac9..1a173c5421 100644 --- a/arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h +++ b/arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h @@ -39,7 +39,7 @@ public: * @param[in] info Convolution layer attributes * @param[in] method Depthwise convolution method to use */ - DepthwiseConvolutionLayerNode(PadStrideInfo info, DepthwiseConvolutionMethod method = DepthwiseConvolutionMethod::DEFAULT); + DepthwiseConvolutionLayerNode(PadStrideInfo info, DepthwiseConvolutionMethod method = DepthwiseConvolutionMethod::Default); /** Sets the depthwise convolution method to use * * @param[in] method Depthwise convolution method to use diff --git a/arm_compute/graph/nodes/Nodes.h b/arm_compute/graph/nodes/Nodes.h index 97aa191916..f2e751e15f 100644 --- a/arm_compute/graph/nodes/Nodes.h +++ b/arm_compute/graph/nodes/Nodes.h @@ -27,10 +27,10 @@ #include "arm_compute/graph/nodes/ActivationLayerNode.h" #include "arm_compute/graph/nodes/BatchNormalizationLayerNode.h" #include "arm_compute/graph/nodes/ChannelShuffleLayerNode.h" +#include "arm_compute/graph/nodes/ConcatenateLayerNode.h" #include "arm_compute/graph/nodes/ConstNode.h" #include "arm_compute/graph/nodes/ConvolutionLayerNode.h" #include "arm_compute/graph/nodes/DeconvolutionLayerNode.h" -#include "arm_compute/graph/nodes/DepthConcatenateLayerNode.h" #include "arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h" #include "arm_compute/graph/nodes/DummyNode.h" #include "arm_compute/graph/nodes/EltwiseLayerNode.h" diff --git a/arm_compute/graph/nodes/NodesFwd.h b/arm_compute/graph/nodes/NodesFwd.h index 05979d796c..a0a9146dc4 100644 --- a/arm_compute/graph/nodes/NodesFwd.h +++ b/arm_compute/graph/nodes/NodesFwd.h @@ -33,10 +33,10 @@ class INode; class ActivationLayerNode; class BatchNormalizationLayerNode; class ChannelShuffleLayerNode; +class ConcatenateLayerNode; class ConstNode; class ConvolutionLayerNode; class DeconvolutionLayerNode; -class DepthConcatenateLayerNode; class DepthwiseConvolutionLayerNode; class DummyNode; class EltwiseLayerNode; diff --git a/arm_compute/graph/printers/DotGraphPrinter.h b/arm_compute/graph/printers/DotGraphPrinter.h index 1d355a52ee..d4cf6928e5 100644 --- a/arm_compute/graph/printers/DotGraphPrinter.h +++ b/arm_compute/graph/printers/DotGraphPrinter.h @@ -52,8 +52,8 @@ public: // Inherited methods overridden void visit(ActivationLayerNode &n) override; void visit(BatchNormalizationLayerNode &n) override; + void visit(ConcatenateLayerNode &n) override; void visit(ConvolutionLayerNode &n) override; - void visit(DepthConcatenateLayerNode &n) override; void visit(DepthwiseConvolutionLayerNode &n) override; void visit(EltwiseLayerNode &n) override; void visit(NormalizationLayerNode &n) override; diff --git a/examples/graph_alexnet.cpp b/examples/graph_alexnet.cpp index 63e7b16128..944a435c3b 100644 --- a/examples/graph_alexnet.cpp +++ b/examples/graph_alexnet.cpp @@ -60,7 +60,6 @@ public: // Checks ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "Unsupported data type!"); - ARM_COMPUTE_EXIT_ON_MSG(common_params.data_layout == DataLayout::NHWC, "Unsupported data layout!"); // Print parameter values std::cout << common_params << std::endl; @@ -72,14 +71,20 @@ public: const std::array mean_rgb{ { 122.68f, 116.67f, 104.01f } }; std::unique_ptr preprocessor = arm_compute::support::cpp14::make_unique(mean_rgb); + // Create input descriptor + const TensorShape tensor_shape = permute_shape(TensorShape(227U, 227U, 3U, 1U), DataLayout::NCHW, common_params.data_layout); + TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout); + + // Set weights trained layout + const DataLayout weights_layout = DataLayout::NCHW; + graph << common_params.target << common_params.fast_math_hint - << InputLayer(TensorDescriptor(TensorShape(227U, 227U, 3U, 1U), common_params.data_type), - get_input_accessor(common_params, std::move(preprocessor))) + << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor))) // Layer 1 << ConvolutionLayer( 11U, 11U, 96U, - get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv1_w.npy"), + get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv1_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv1_b.npy"), PadStrideInfo(4, 4, 0, 0)) .set_name("conv1") @@ -89,7 +94,7 @@ public: // Layer 2 << ConvolutionLayer( 5U, 5U, 256U, - get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv2_w.npy"), + get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv2_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv2_b.npy"), PadStrideInfo(1, 1, 2, 2), 2) .set_name("conv2") @@ -99,7 +104,7 @@ public: // Layer 3 << ConvolutionLayer( 3U, 3U, 384U, - get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv3_w.npy"), + get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv3_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv3_b.npy"), PadStrideInfo(1, 1, 1, 1)) .set_name("conv3") @@ -107,7 +112,7 @@ public: // Layer 4 << ConvolutionLayer( 3U, 3U, 384U, - get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv4_w.npy"), + get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv4_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv4_b.npy"), PadStrideInfo(1, 1, 1, 1), 2) .set_name("conv4") @@ -115,7 +120,7 @@ public: // Layer 5 << ConvolutionLayer( 3U, 3U, 256U, - get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv5_w.npy"), + get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv5_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv5_b.npy"), PadStrideInfo(1, 1, 1, 1), 2) .set_name("conv5") @@ -124,21 +129,21 @@ public: // Layer 6 << FullyConnectedLayer( 4096U, - get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc6_w.npy"), + get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc6_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc6_b.npy")) .set_name("fc6") << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu6") // Layer 7 << FullyConnectedLayer( 4096U, - get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc7_w.npy"), + get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc7_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc7_b.npy")) .set_name("fc7") << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu7") // Layer 8 << FullyConnectedLayer( 1000U, - get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc8_w.npy"), + get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc8_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc8_b.npy")) .set_name("fc8") // Softmax diff --git a/examples/graph_googlenet.cpp b/examples/graph_googlenet.cpp index 4497dbd470..d5bd0c0552 100644 --- a/examples/graph_googlenet.cpp +++ b/examples/graph_googlenet.cpp @@ -60,7 +60,6 @@ public: // Checks ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "Unsupported data type!"); - ARM_COMPUTE_EXIT_ON_MSG(common_params.data_layout == DataLayout::NHWC, "Unsupported data layout!"); // Print parameter values std::cout << common_params << std::endl; @@ -72,13 +71,19 @@ public: const std::array mean_rgb{ { 122.68f, 116.67f, 104.01f } }; std::unique_ptr preprocessor = arm_compute::support::cpp14::make_unique(mean_rgb); + // Create input descriptor + const TensorShape tensor_shape = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, common_params.data_layout); + TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout); + + // Set weights trained layout + const DataLayout weights_layout = DataLayout::NCHW; + graph << common_params.target << common_params.fast_math_hint - << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), common_params.data_type), - get_input_accessor(common_params, std::move(preprocessor))) + << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor))) << ConvolutionLayer( 7U, 7U, 64U, - get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv1/conv1_7x7_s2_w.npy"), + get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv1/conv1_7x7_s2_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv1/conv1_7x7_s2_b.npy"), PadStrideInfo(2, 2, 3, 3)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) @@ -86,33 +91,33 @@ public: << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)) << ConvolutionLayer( 1U, 1U, 64U, - get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_reduce_w.npy"), + get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_reduce_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_reduce_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer( 3U, 3U, 192U, - get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_w.npy"), + get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_b.npy"), PadStrideInfo(1, 1, 1, 1)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)) << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))); - graph << get_inception_node(data_path, "inception_3a", 64, std::make_tuple(96U, 128U), std::make_tuple(16U, 32U), 32U); - graph << get_inception_node(data_path, "inception_3b", 128, std::make_tuple(128U, 192U), std::make_tuple(32U, 96U), 64U); + graph << get_inception_node(data_path, "inception_3a", weights_layout, 64, std::make_tuple(96U, 128U), std::make_tuple(16U, 32U), 32U); + graph << get_inception_node(data_path, "inception_3b", weights_layout, 128, std::make_tuple(128U, 192U), std::make_tuple(32U, 96U), 64U); graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))); - graph << get_inception_node(data_path, "inception_4a", 192, std::make_tuple(96U, 208U), std::make_tuple(16U, 48U), 64U); - graph << get_inception_node(data_path, "inception_4b", 160, std::make_tuple(112U, 224U), std::make_tuple(24U, 64U), 64U); - graph << get_inception_node(data_path, "inception_4c", 128, std::make_tuple(128U, 256U), std::make_tuple(24U, 64U), 64U); - graph << get_inception_node(data_path, "inception_4d", 112, std::make_tuple(144U, 288U), std::make_tuple(32U, 64U), 64U); - graph << get_inception_node(data_path, "inception_4e", 256, std::make_tuple(160U, 320U), std::make_tuple(32U, 128U), 128U); + graph << get_inception_node(data_path, "inception_4a", weights_layout, 192, std::make_tuple(96U, 208U), std::make_tuple(16U, 48U), 64U); + graph << get_inception_node(data_path, "inception_4b", weights_layout, 160, std::make_tuple(112U, 224U), std::make_tuple(24U, 64U), 64U); + graph << get_inception_node(data_path, "inception_4c", weights_layout, 128, std::make_tuple(128U, 256U), std::make_tuple(24U, 64U), 64U); + graph << get_inception_node(data_path, "inception_4d", weights_layout, 112, std::make_tuple(144U, 288U), std::make_tuple(32U, 64U), 64U); + graph << get_inception_node(data_path, "inception_4e", weights_layout, 256, std::make_tuple(160U, 320U), std::make_tuple(32U, 128U), 128U); graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))); - graph << get_inception_node(data_path, "inception_5a", 256, std::make_tuple(160U, 320U), std::make_tuple(32U, 128U), 128U); - graph << get_inception_node(data_path, "inception_5b", 384, std::make_tuple(192U, 384U), std::make_tuple(48U, 128U), 128U); + graph << get_inception_node(data_path, "inception_5a", weights_layout, 256, std::make_tuple(160U, 320U), std::make_tuple(32U, 128U), 128U); + graph << get_inception_node(data_path, "inception_5b", weights_layout, 384, std::make_tuple(192U, 384U), std::make_tuple(48U, 128U), 128U); graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 7, PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL))) << FullyConnectedLayer( 1000U, - get_weights_accessor(data_path, "/cnn_data/googlenet_model/loss3/loss3_classifier_w.npy"), + get_weights_accessor(data_path, "/cnn_data/googlenet_model/loss3/loss3_classifier_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/googlenet_model/loss3/loss3_classifier_b.npy")) << SoftmaxLayer() << OutputLayer(get_output_accessor(common_params, 5)); @@ -139,7 +144,7 @@ private: CommonGraphParams common_params; Stream graph; - BranchLayer get_inception_node(const std::string &data_path, std::string &¶m_path, + BranchLayer get_inception_node(const std::string &data_path, std::string &¶m_path, DataLayout weights_layout, unsigned int a_filt, std::tuple b_filters, std::tuple c_filters, @@ -149,7 +154,7 @@ private: SubStream i_a(graph); i_a << ConvolutionLayer( 1U, 1U, a_filt, - get_weights_accessor(data_path, total_path + "1x1_w.npy"), + get_weights_accessor(data_path, total_path + "1x1_w.npy", weights_layout), get_weights_accessor(data_path, total_path + "1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); @@ -157,13 +162,13 @@ private: SubStream i_b(graph); i_b << ConvolutionLayer( 1U, 1U, std::get<0>(b_filters), - get_weights_accessor(data_path, total_path + "3x3_reduce_w.npy"), + get_weights_accessor(data_path, total_path + "3x3_reduce_w.npy", weights_layout), get_weights_accessor(data_path, total_path + "3x3_reduce_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer( 3U, 3U, std::get<1>(b_filters), - get_weights_accessor(data_path, total_path + "3x3_w.npy"), + get_weights_accessor(data_path, total_path + "3x3_w.npy", weights_layout), get_weights_accessor(data_path, total_path + "3x3_b.npy"), PadStrideInfo(1, 1, 1, 1)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); @@ -171,13 +176,13 @@ private: SubStream i_c(graph); i_c << ConvolutionLayer( 1U, 1U, std::get<0>(c_filters), - get_weights_accessor(data_path, total_path + "5x5_reduce_w.npy"), + get_weights_accessor(data_path, total_path + "5x5_reduce_w.npy", weights_layout), get_weights_accessor(data_path, total_path + "5x5_reduce_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer( 5U, 5U, std::get<1>(c_filters), - get_weights_accessor(data_path, total_path + "5x5_w.npy"), + get_weights_accessor(data_path, total_path + "5x5_w.npy", weights_layout), get_weights_accessor(data_path, total_path + "5x5_b.npy"), PadStrideInfo(1, 1, 2, 2)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); @@ -186,7 +191,7 @@ private: i_d << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL))) << ConvolutionLayer( 1U, 1U, d_filt, - get_weights_accessor(data_path, total_path + "pool_proj_w.npy"), + get_weights_accessor(data_path, total_path + "pool_proj_w.npy", weights_layout), get_weights_accessor(data_path, total_path + "pool_proj_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); diff --git a/examples/graph_inception_v3.cpp b/examples/graph_inception_v3.cpp index 67f4e3cacf..c41b0a808e 100644 --- a/examples/graph_inception_v3.cpp +++ b/examples/graph_inception_v3.cpp @@ -60,7 +60,6 @@ public: // Checks ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "Unsupported data type!"); - ARM_COMPUTE_EXIT_ON_MSG(common_params.data_layout == DataLayout::NHWC, "Unsupported data layout!"); // Print parameter values std::cout << common_params << std::endl; @@ -71,12 +70,18 @@ public: // Create a preprocessor object std::unique_ptr preprocessor = arm_compute::support::cpp14::make_unique(); + // Create input descriptor + const TensorShape tensor_shape = permute_shape(TensorShape(299U, 299U, 3U, 1U), DataLayout::NCHW, common_params.data_layout); + TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout); + + // Set weights trained layout + const DataLayout weights_layout = DataLayout::NCHW; + graph << common_params.target << common_params.fast_math_hint - << InputLayer(TensorDescriptor(TensorShape(299U, 299U, 3U, 1U), common_params.data_type), - get_input_accessor(common_params, std::move(preprocessor), false)) + << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false)) << ConvolutionLayer(3U, 3U, 32U, - get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_weights.npy"), + get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(2, 2, 0, 0)) .set_name("Conv2d_1a_3x3/convolution") << BatchNormalizationLayer(get_weights_accessor(data_path, @@ -89,7 +94,7 @@ public: .set_name("Conv2d_1a_3x3/BatchNorm/batchnorm") << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_1a_3x3/Relu") << ConvolutionLayer(3U, 3U, 32U, - get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_weights.npy"), + get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) .set_name("Conv2d_2a_3x3/convolution") << BatchNormalizationLayer(get_weights_accessor(data_path, @@ -103,7 +108,7 @@ public: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_2a_3x3/Relu") << ConvolutionLayer(3U, 3U, 64U, - get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_weights.npy"), + get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 1, 1)) .set_name("Conv2d_2b_3x3/convolution") << BatchNormalizationLayer(get_weights_accessor(data_path, @@ -119,7 +124,7 @@ public: << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("MaxPool_3a_3x3/MaxPool") << ConvolutionLayer(1U, 1U, 80U, - get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_weights.npy"), + get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) .set_name("Conv2d_3b_1x1/convolution") << BatchNormalizationLayer(get_weights_accessor(data_path, @@ -133,7 +138,7 @@ public: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_3b_1x1/Relu") << ConvolutionLayer(3U, 3U, 192U, - get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_weights.npy"), + get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) .set_name("Conv2d_4a_3x3/convolution") << BatchNormalizationLayer(get_weights_accessor(data_path, @@ -148,45 +153,45 @@ public: << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("MaxPool_5a_3x3/MaxPool"); - graph << get_inception_node_A(data_path, "Mixed_5b", 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U), + graph << get_inception_node_A(data_path, "Mixed_5b", weights_layout, 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U), 32U) .set_name("Mixed_5b/concat"); - graph << get_inception_node_A(data_path, "Mixed_5c", 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U), + graph << get_inception_node_A(data_path, "Mixed_5c", weights_layout, 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U), 64U, true) .set_name("Mixed_5c/concat"); - graph << get_inception_node_A(data_path, "Mixed_5d", 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U), + graph << get_inception_node_A(data_path, "Mixed_5d", weights_layout, 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U), 64U) .set_name("Mixed_5d/concat"); - graph << get_inception_node_B(data_path, "Mixed_6a", 384U, std::make_tuple(64U, 96U, 96U)).set_name("Mixed_6a/concat"); + graph << get_inception_node_B(data_path, "Mixed_6a", weights_layout, 384U, std::make_tuple(64U, 96U, 96U)).set_name("Mixed_6a/concat"); - graph << get_inception_node_C(data_path, "Mixed_6b", 192U, std::make_tuple(128U, 128U, 192U), + graph << get_inception_node_C(data_path, "Mixed_6b", weights_layout, 192U, std::make_tuple(128U, 128U, 192U), std::make_tuple(128U, 128U, 128U, 128U, 192U), 192U) .set_name("Mixed_6b/concat"); - graph << get_inception_node_C(data_path, "Mixed_6c", 192U, std::make_tuple(160U, 160U, 192U), + graph << get_inception_node_C(data_path, "Mixed_6c", weights_layout, 192U, std::make_tuple(160U, 160U, 192U), std::make_tuple(160U, 160U, 160U, 160U, 192U), 192U) .set_name("Mixed_6c/concat"); - graph << get_inception_node_C(data_path, "Mixed_6d", 192U, std::make_tuple(160U, 160U, 192U), + graph << get_inception_node_C(data_path, "Mixed_6d", weights_layout, 192U, std::make_tuple(160U, 160U, 192U), std::make_tuple(160U, 160U, 160U, 160U, 192U), 192U) .set_name("Mixed_6d/concat"); - graph << get_inception_node_C(data_path, "Mixed_6e", 192U, std::make_tuple(192U, 192U, 192U), + graph << get_inception_node_C(data_path, "Mixed_6e", weights_layout, 192U, std::make_tuple(192U, 192U, 192U), std::make_tuple(192U, 192U, 192U, 192U, 192U), 192U) .set_name("Mixed_6e/concat"); - graph << get_inception_node_D(data_path, "Mixed_7a", std::make_tuple(192U, 320U), + graph << get_inception_node_D(data_path, "Mixed_7a", weights_layout, std::make_tuple(192U, 320U), std::make_tuple(192U, 192U, 192U, 192U)) .set_name("Mixed_7a/concat"); - graph << get_inception_node_E(data_path, "Mixed_7b", 320U, std::make_tuple(384U, 384U, 384U), + graph << get_inception_node_E(data_path, "Mixed_7b", weights_layout, 320U, std::make_tuple(384U, 384U, 384U), std::make_tuple(448U, 384U, 384U, 384U), 192U) .set_name("Mixed_7b/concat"); - graph << get_inception_node_E(data_path, "Mixed_7c", 320U, std::make_tuple(384U, 384U, 384U), + graph << get_inception_node_E(data_path, "Mixed_7c", weights_layout, 320U, std::make_tuple(384U, 384U, 384U), std::make_tuple(448U, 384U, 384U, 384U), 192U, true) .set_name("Mixed_7c/concat"); graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 8, PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL))).set_name("Logits/AvgPool_1a_8x8/AvgPool") << ConvolutionLayer(1U, 1U, 1001U, get_weights_accessor(data_path, - "/cnn_data/inceptionv3_model/Logits_Conv2d_1c_1x1_weights.npy"), + "/cnn_data/inceptionv3_model/Logits_Conv2d_1c_1x1_weights.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Logits_Conv2d_1c_1x1_biases.npy"), PadStrideInfo(1, 1, 0, 0)) @@ -218,7 +223,7 @@ private: Stream graph; private: - BranchLayer get_inception_node_A(const std::string &data_path, std::string &¶m_path, + BranchLayer get_inception_node_A(const std::string &data_path, std::string &¶m_path, DataLayout weights_layout, unsigned int a_filt, std::tuple b_filters, std::tuple c_filters, @@ -239,7 +244,7 @@ private: SubStream i_a(graph); i_a << ConvolutionLayer( 1U, 1U, a_filt, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution") @@ -255,7 +260,7 @@ private: SubStream i_b(graph); i_b << ConvolutionLayer( 1U, 1U, std::get<0>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) .set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/convolution") @@ -269,7 +274,7 @@ private: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/Relu") << ConvolutionLayer( 5U, 5U, std::get<1>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 2, 2)) .set_name(param_path + "/Branch_1/Conv2d" + conv_id1 + "5x5/convolution") @@ -285,7 +290,7 @@ private: SubStream i_c(graph); i_c << ConvolutionLayer( 1U, 1U, std::get<0>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution") @@ -299,7 +304,7 @@ private: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu") << ConvolutionLayer( 3U, 3U, std::get<1>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 1, 1)) .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/convolution") @@ -313,7 +318,7 @@ private: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Relu") << ConvolutionLayer( 3U, 3U, std::get<2>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 1, 1)) .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/convolution") @@ -330,7 +335,7 @@ private: i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)).set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool") << ConvolutionLayer( 1U, 1U, d_filt, - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution") @@ -346,7 +351,7 @@ private: return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)); } - BranchLayer get_inception_node_B(const std::string &data_path, std::string &¶m_path, + BranchLayer get_inception_node_B(const std::string &data_path, std::string &¶m_path, DataLayout weights_layout, unsigned int a_filt, std::tuple b_filters) { @@ -354,7 +359,7 @@ private: SubStream i_a(graph); i_a << ConvolutionLayer( 3U, 3U, a_filt, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(2, 2, 0, 0)) .set_name(param_path + "/Branch_0/Conv2d_1a_1x1/convolution") @@ -370,7 +375,7 @@ private: SubStream i_b(graph); i_b << ConvolutionLayer( 1U, 1U, std::get<0>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution") @@ -384,7 +389,7 @@ private: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu") << ConvolutionLayer( 3U, 3U, std::get<1>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 1, 1)) .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/convolution") @@ -398,7 +403,7 @@ private: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_3x3/Relu") << ConvolutionLayer( 3U, 3U, std::get<2>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(2, 2, 0, 0)) .set_name(param_path + "/Branch_1/Conv2d_1a_1x1/convolution") @@ -417,7 +422,7 @@ private: return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c)); } - BranchLayer get_inception_node_C(const std::string &data_path, std::string &¶m_path, + BranchLayer get_inception_node_C(const std::string &data_path, std::string &¶m_path, DataLayout weights_layout, unsigned int a_filt, std::tuple b_filters, std::tuple c_filters, @@ -427,7 +432,7 @@ private: SubStream i_a(graph); i_a << ConvolutionLayer( 1U, 1U, a_filt, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution") @@ -443,7 +448,7 @@ private: SubStream i_b(graph); i_b << ConvolutionLayer( 1U, 1U, std::get<0>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution") @@ -457,7 +462,7 @@ private: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu") << ConvolutionLayer( 7U, 1U, std::get<1>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 3, 0)) .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/convolution") @@ -471,7 +476,7 @@ private: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Relu") << ConvolutionLayer( 1U, 7U, std::get<2>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 3)) .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/convolution") @@ -487,7 +492,7 @@ private: SubStream i_c(graph); i_c << ConvolutionLayer( 1U, 1U, std::get<0>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution") @@ -501,7 +506,7 @@ private: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu") << ConvolutionLayer( 1U, 7U, std::get<1>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 3)) .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/convolution") @@ -515,7 +520,7 @@ private: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_7x1/Relu") << ConvolutionLayer( 7U, 1U, std::get<2>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 3, 0)) .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/convolution") @@ -529,7 +534,7 @@ private: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0c_1x7/Relu") << ConvolutionLayer( 1U, 7U, std::get<3>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 3)) .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/convolution") @@ -543,7 +548,7 @@ private: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0d_7x1/Relu") << ConvolutionLayer( 7U, 1U, std::get<4>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 3, 0)) .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/convolution") @@ -560,7 +565,7 @@ private: i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)).set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool") << ConvolutionLayer( 1U, 1U, d_filt, - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution") @@ -576,15 +581,15 @@ private: return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)); } - BranchLayer get_inception_node_D(const std::string &data_path, std::string &¶m_path, - std::tuple a_filters, + BranchLayer get_inception_node_D(const std::string &data_path, std::string &¶m_path, DataLayout weights_layout, + std::tuple a_filters, std::tuple b_filters) { std::string total_path = "/cnn_data/inceptionv3_model/" + param_path + "_"; SubStream i_a(graph); i_a << ConvolutionLayer( 1U, 1U, std::get<0>(a_filters), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution") @@ -598,7 +603,7 @@ private: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu") << ConvolutionLayer( 3U, 3U, std::get<1>(a_filters), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(2, 2, 0, 0)) .set_name(param_path + "/Branch_0/Conv2d_1a_3x3/convolution") @@ -614,7 +619,7 @@ private: SubStream i_b(graph); i_b << ConvolutionLayer( 1U, 1U, std::get<0>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution") @@ -628,7 +633,7 @@ private: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu") << ConvolutionLayer( 7U, 1U, std::get<1>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 3, 0)) .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/convolution") @@ -642,7 +647,7 @@ private: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Relu") << ConvolutionLayer( 1U, 7U, std::get<2>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 3)) .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/convolution") @@ -656,7 +661,7 @@ private: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0c_7x1/Relu") << ConvolutionLayer( 3U, 3U, std::get<3>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(2, 2, 0, 0)) .set_name(param_path + "/Branch_1/Conv2d_1a_3x3/convolution") @@ -675,7 +680,7 @@ private: return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c)); } - BranchLayer get_inception_node_E(const std::string &data_path, std::string &¶m_path, + BranchLayer get_inception_node_E(const std::string &data_path, std::string &¶m_path, DataLayout weights_layout, unsigned int a_filt, std::tuple b_filters, std::tuple c_filters, @@ -693,7 +698,7 @@ private: SubStream i_a(graph); i_a << ConvolutionLayer( 1U, 1U, a_filt, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution") @@ -709,7 +714,7 @@ private: SubStream i_b(graph); i_b << ConvolutionLayer( 1U, 1U, std::get<0>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution") @@ -725,7 +730,7 @@ private: SubStream i_b1(i_b); i_b1 << ConvolutionLayer( 3U, 1U, std::get<1>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 1, 0)) .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/convolution") @@ -741,7 +746,7 @@ private: SubStream i_b2(i_b); i_b2 << ConvolutionLayer( 1U, 3U, std::get<2>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 1)) .set_name(param_path + "/Branch_1/Conv2d" + conv_id + "3x1/convolution") @@ -760,7 +765,7 @@ private: SubStream i_c(graph); i_c << ConvolutionLayer( 1U, 1U, std::get<0>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution") @@ -774,7 +779,7 @@ private: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu") << ConvolutionLayer( 3U, 3U, std::get<1>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 1, 1)) .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/convolution") @@ -790,7 +795,7 @@ private: SubStream i_c1(i_c); i_c1 << ConvolutionLayer( 3U, 1U, std::get<2>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 1, 0)) .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/convolution") @@ -806,7 +811,7 @@ private: SubStream i_c2(i_c); i_c2 << ConvolutionLayer( 1U, 3U, std::get<3>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 1)) .set_name(param_path + "/Branch_2/Conv2d_0d_3x1/convolution") @@ -826,7 +831,7 @@ private: i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)).set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool") << ConvolutionLayer( 1U, 1U, d_filt, - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution") diff --git a/examples/graph_inception_v4.cpp b/examples/graph_inception_v4.cpp index 4e405923fc..b61acfcb3f 100644 --- a/examples/graph_inception_v4.cpp +++ b/examples/graph_inception_v4.cpp @@ -60,7 +60,6 @@ public: // Checks ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "Unsupported data type!"); - ARM_COMPUTE_EXIT_ON_MSG(common_params.data_layout == DataLayout::NHWC, "Unsupported data layout!"); // Print parameter values std::cout << common_params << std::endl; @@ -71,13 +70,19 @@ public: // Create a preprocessor object std::unique_ptr preprocessor = arm_compute::support::cpp14::make_unique(); + // Create input descriptor + const TensorShape tensor_shape = permute_shape(TensorShape(299U, 299U, 3U, 1U), DataLayout::NCHW, common_params.data_layout); + TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout); + + // Set weights trained layout + const DataLayout weights_layout = DataLayout::NCHW; + graph << common_params.target << common_params.fast_math_hint - << InputLayer(TensorDescriptor(TensorShape(299U, 299U, 3U, 1U), common_params.data_type), - get_input_accessor(common_params, std::move(preprocessor), false)) + << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false)) // Conv2d_1a_3x3 << ConvolutionLayer(3U, 3U, 32U, - get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_weights.npy"), + get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(2, 2, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), @@ -87,7 +92,7 @@ public: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) // Conv2d_2a_3x3 << ConvolutionLayer(3U, 3U, 32U, - get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_weights.npy"), + get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_BatchNorm_moving_variance.npy"), @@ -97,7 +102,7 @@ public: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) // Conv2d_2b_3x3 << ConvolutionLayer(3U, 3U, 64U, - get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_weights.npy"), + get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 1, 1)) << BatchNormalizationLayer(get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_BatchNorm_moving_variance.npy"), @@ -106,35 +111,35 @@ public: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - graph << get_mixed_3a(data_path); - graph << get_mixed_4a(data_path); - graph << get_mixed_5a(data_path); + graph << get_mixed_3a(data_path, weights_layout); + graph << get_mixed_4a(data_path, weights_layout); + graph << get_mixed_5a(data_path, weights_layout); // 4 inception A blocks - graph << get_inceptionA_block(data_path, "Mixed_5b"); - graph << get_inceptionA_block(data_path, "Mixed_5c"); - graph << get_inceptionA_block(data_path, "Mixed_5d"); - graph << get_inceptionA_block(data_path, "Mixed_5e"); + graph << get_inceptionA_block(data_path, weights_layout, "Mixed_5b"); + graph << get_inceptionA_block(data_path, weights_layout, "Mixed_5c"); + graph << get_inceptionA_block(data_path, weights_layout, "Mixed_5d"); + graph << get_inceptionA_block(data_path, weights_layout, "Mixed_5e"); // reduction A block - graph << get_reductionA_block(data_path); + graph << get_reductionA_block(data_path, weights_layout); // 7 inception B blocks - graph << get_inceptionB_block(data_path, "Mixed_6b"); - graph << get_inceptionB_block(data_path, "Mixed_6c"); - graph << get_inceptionB_block(data_path, "Mixed_6d"); - graph << get_inceptionB_block(data_path, "Mixed_6e"); - graph << get_inceptionB_block(data_path, "Mixed_6f"); - graph << get_inceptionB_block(data_path, "Mixed_6g"); - graph << get_inceptionB_block(data_path, "Mixed_6h"); + graph << get_inceptionB_block(data_path, weights_layout, "Mixed_6b"); + graph << get_inceptionB_block(data_path, weights_layout, "Mixed_6c"); + graph << get_inceptionB_block(data_path, weights_layout, "Mixed_6d"); + graph << get_inceptionB_block(data_path, weights_layout, "Mixed_6e"); + graph << get_inceptionB_block(data_path, weights_layout, "Mixed_6f"); + graph << get_inceptionB_block(data_path, weights_layout, "Mixed_6g"); + graph << get_inceptionB_block(data_path, weights_layout, "Mixed_6h"); // reduction B block - graph << get_reductionB_block(data_path); + graph << get_reductionB_block(data_path, weights_layout); // 3 inception C blocks - graph << get_inceptionC_block(data_path, "Mixed_7b"); - graph << get_inceptionC_block(data_path, "Mixed_7c"); - graph << get_inceptionC_block(data_path, "Mixed_7d"); + graph << get_inceptionC_block(data_path, weights_layout, "Mixed_7b"); + graph << get_inceptionC_block(data_path, weights_layout, "Mixed_7c"); + graph << get_inceptionC_block(data_path, weights_layout, "Mixed_7d"); graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG)) << FlattenLayer() << FullyConnectedLayer( 1001U, - get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Logits_Logits_weights.npy"), + get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Logits_Logits_weights.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Logits_Logits_biases.npy")) << SoftmaxLayer() << OutputLayer(get_output_accessor(common_params, 5)); @@ -162,7 +167,7 @@ private: Stream graph; private: - BranchLayer get_mixed_3a(const std::string &data_path) + BranchLayer get_mixed_3a(const std::string &data_path, DataLayout weights_layout) { std::string total_path = "/cnn_data/inceptionv4_model/Mixed_3a_"; @@ -171,7 +176,7 @@ private: SubStream i_b(graph); i_b << ConvolutionLayer(3U, 3U, 96U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(2, 2, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_BatchNorm_moving_variance.npy"), @@ -183,13 +188,13 @@ private: return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b)); } - BranchLayer get_mixed_4a(const std::string &data_path) + BranchLayer get_mixed_4a(const std::string &data_path, DataLayout weights_layout) { std::string total_path = "/cnn_data/inceptionv4_model/Mixed_4a_"; SubStream i_a(graph); i_a << ConvolutionLayer(1U, 1U, 64U, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), @@ -198,7 +203,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer(3U, 3U, 96U, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), @@ -209,7 +214,7 @@ private: SubStream i_b(graph); i_b << ConvolutionLayer(1U, 1U, 64U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), @@ -218,7 +223,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer(7U, 1U, 64U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 3, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"), @@ -227,7 +232,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer(1U, 7U, 64U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 3)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"), @@ -236,7 +241,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer(3U, 3U, 96U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), @@ -248,13 +253,13 @@ private: return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b)); } - BranchLayer get_mixed_5a(const std::string &data_path) + BranchLayer get_mixed_5a(const std::string &data_path, DataLayout weights_layout) { std::string total_path = "/cnn_data/inceptionv4_model/Mixed_5a_"; SubStream i_a(graph); i_a << ConvolutionLayer(3U, 3U, 192U, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(2, 2, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), @@ -269,13 +274,13 @@ private: return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b)); } - BranchLayer get_inceptionA_block(const std::string &data_path, std::string &¶m_path) + BranchLayer get_inceptionA_block(const std::string &data_path, DataLayout weights_layout, std::string &¶m_path) { std::string total_path = "/cnn_data/inceptionv4_model/" + param_path + "_"; SubStream i_a(graph); i_a << ConvolutionLayer(1U, 1U, 96U, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), @@ -286,7 +291,7 @@ private: SubStream i_b(graph); i_b << ConvolutionLayer(1U, 1U, 64U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), @@ -295,7 +300,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer(3U, 3U, 96U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 1, 1)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), @@ -306,7 +311,7 @@ private: SubStream i_c(graph); i_c << ConvolutionLayer(1U, 1U, 64U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), @@ -315,7 +320,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer(3U, 3U, 96U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 1, 1)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), @@ -324,7 +329,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer(3U, 3U, 96U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 1, 1)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"), @@ -336,7 +341,7 @@ private: SubStream i_d(graph); i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)) << ConvolutionLayer(1U, 1U, 96U, - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"), @@ -348,13 +353,13 @@ private: return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)); } - BranchLayer get_reductionA_block(const std::string &data_path) + BranchLayer get_reductionA_block(const std::string &data_path, DataLayout weights_layout) { std::string total_path = "/cnn_data/inceptionv4_model/Mixed_6a_"; SubStream i_a(graph); i_a << ConvolutionLayer(3U, 3U, 384U, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(2, 2, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), @@ -365,7 +370,7 @@ private: SubStream i_b(graph); i_b << ConvolutionLayer(1U, 1U, 192U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), @@ -374,7 +379,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer(3U, 3U, 224U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 1, 1)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), @@ -383,7 +388,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer(3U, 3U, 256U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(2, 2, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), @@ -398,13 +403,13 @@ private: return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c)); } - BranchLayer get_inceptionB_block(const std::string &data_path, std::string &¶m_path) + BranchLayer get_inceptionB_block(const std::string &data_path, DataLayout weights_layout, std::string &¶m_path) { std::string total_path = "/cnn_data/inceptionv4_model/" + param_path + "_"; SubStream i_a(graph); i_a << ConvolutionLayer(1U, 1U, 384U, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), @@ -415,7 +420,7 @@ private: SubStream i_b(graph); i_b << ConvolutionLayer(1U, 1U, 192U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), @@ -424,7 +429,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer(7U, 1U, 224U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 3, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"), @@ -433,7 +438,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer(1U, 7U, 256U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 3)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"), @@ -444,7 +449,7 @@ private: SubStream i_c(graph); i_c << ConvolutionLayer(1U, 1U, 192U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), @@ -453,7 +458,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer(1U, 7U, 192U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 3)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_variance.npy"), @@ -462,7 +467,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer(7U, 1U, 224U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 3, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_variance.npy"), @@ -471,7 +476,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer(1U, 7U, 224U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 3)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_variance.npy"), @@ -480,7 +485,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer(7U, 1U, 256U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 3, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_variance.npy"), @@ -492,7 +497,7 @@ private: SubStream i_d(graph); i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)) << ConvolutionLayer(1U, 1U, 128U, - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"), @@ -504,13 +509,13 @@ private: return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)); } - BranchLayer get_reductionB_block(const std::string &data_path) + BranchLayer get_reductionB_block(const std::string &data_path, DataLayout weights_layout) { std::string total_path = "/cnn_data/inceptionv4_model/Mixed_7a_"; SubStream i_a(graph); i_a << ConvolutionLayer(1U, 1U, 192U, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), @@ -519,7 +524,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer(3U, 3U, 192U, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(2, 2, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), @@ -530,7 +535,7 @@ private: SubStream i_b(graph); i_b << ConvolutionLayer(1U, 1U, 256U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), @@ -539,7 +544,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer(7U, 1U, 256U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 3, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"), @@ -548,7 +553,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer(1U, 7U, 320U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 3)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"), @@ -557,7 +562,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer(3U, 3U, 320U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(2, 2, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), @@ -572,13 +577,13 @@ private: return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c)); } - BranchLayer get_inceptionC_block(const std::string &data_path, std::string &¶m_path) + BranchLayer get_inceptionC_block(const std::string &data_path, DataLayout weights_layout, std::string &¶m_path) { std::string total_path = "/cnn_data/inceptionv4_model/" + param_path + "_"; SubStream i_a(graph); i_a << ConvolutionLayer(1U, 1U, 256U, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), @@ -590,7 +595,7 @@ private: SubStream i_b(graph); i_b << ConvolutionLayer( 1U, 1U, 384U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) << BatchNormalizationLayer( @@ -604,7 +609,7 @@ private: SubStream i_b1(i_b); i_b1 << ConvolutionLayer( 3U, 1U, 256U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 1, 0)) << BatchNormalizationLayer( @@ -618,7 +623,7 @@ private: SubStream i_b2(i_b); i_b2 << ConvolutionLayer( 1U, 3U, 256U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 1)) << BatchNormalizationLayer( @@ -635,7 +640,7 @@ private: SubStream i_c(graph); i_c << ConvolutionLayer( 1U, 1U, 384U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) << BatchNormalizationLayer( @@ -647,7 +652,7 @@ private: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer( 1U, 3U, 448U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 1)) << BatchNormalizationLayer( @@ -659,7 +664,7 @@ private: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer( 3U, 1U, 512U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 1, 0)) << BatchNormalizationLayer( @@ -673,7 +678,7 @@ private: SubStream i_c1(i_c); i_c1 << ConvolutionLayer( 3U, 1U, 256U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 1, 0)) << BatchNormalizationLayer( @@ -687,7 +692,7 @@ private: SubStream i_c2(i_c); i_c2 << ConvolutionLayer( 1U, 3U, 256U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 1)) << BatchNormalizationLayer( @@ -704,7 +709,7 @@ private: SubStream i_d(graph); i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)) << ConvolutionLayer(1U, 1U, 256U, - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"), diff --git a/examples/graph_mobilenet.cpp b/examples/graph_mobilenet.cpp index a747b3cd11..d182844530 100644 --- a/examples/graph_mobilenet.cpp +++ b/examples/graph_mobilenet.cpp @@ -85,7 +85,7 @@ public: // Set graph hints graph << common_params.target - << DepthwiseConvolutionMethod::OPTIMIZED_3x3 // FIXME(COMPMID-1073): Add heuristics to automatically call the optimized 3x3 method + << DepthwiseConvolutionMethod::Optimized3x3 // FIXME(COMPMID-1073): Add heuristics to automatically call the optimized 3x3 method << common_params.fast_math_hint; // Create core graph diff --git a/examples/graph_resnet50.cpp b/examples/graph_resnet50.cpp index 58f36f6ae4..0ad719a2ca 100644 --- a/examples/graph_resnet50.cpp +++ b/examples/graph_resnet50.cpp @@ -60,7 +60,6 @@ public: // Checks ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "Unsupported data type!"); - ARM_COMPUTE_EXIT_ON_MSG(common_params.data_layout == DataLayout::NHWC, "Unsupported data layout!"); // Print parameter values std::cout << common_params << std::endl; @@ -72,13 +71,20 @@ public: const std::array mean_rgb{ { 122.68f, 116.67f, 104.01f } }; std::unique_ptr preprocessor = arm_compute::support::cpp14::make_unique(mean_rgb, false /* Do not convert to BGR */); + + // Create input descriptor + const TensorShape tensor_shape = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, common_params.data_layout); + TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout); + + // Set weights trained layout + const DataLayout weights_layout = DataLayout::NCHW; + graph << common_params.target << common_params.fast_math_hint - << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), common_params.data_type), - get_input_accessor(common_params, std::move(preprocessor), false /* Do not convert to BGR */)) + << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false /* Do not convert to BGR */)) << ConvolutionLayer( 7U, 7U, 64U, - get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_weights.npy"), + get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(2, 2, 3, 3)) .set_name("conv1/convolution") @@ -92,15 +98,15 @@ public: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1/Relu") << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))).set_name("pool1/MaxPool"); - add_residual_block(data_path, "block1", 64, 3, 2); - add_residual_block(data_path, "block2", 128, 4, 2); - add_residual_block(data_path, "block3", 256, 6, 2); - add_residual_block(data_path, "block4", 512, 3, 1); + add_residual_block(data_path, "block1", weights_layout, 64, 3, 2); + add_residual_block(data_path, "block2", weights_layout, 128, 4, 2); + add_residual_block(data_path, "block3", weights_layout, 256, 6, 2); + add_residual_block(data_path, "block4", weights_layout, 512, 3, 1); graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG)).set_name("pool5") << ConvolutionLayer( 1U, 1U, 1000U, - get_weights_accessor(data_path, "/cnn_data/resnet50_model/logits_weights.npy"), + get_weights_accessor(data_path, "/cnn_data/resnet50_model/logits_weights.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/resnet50_model/logits_biases.npy"), PadStrideInfo(1, 1, 0, 0)) .set_name("logits/convolution") @@ -129,7 +135,8 @@ private: CommonGraphParams common_params; Stream graph; - void add_residual_block(const std::string &data_path, const std::string &name, unsigned int base_depth, unsigned int num_units, unsigned int stride) + void add_residual_block(const std::string &data_path, const std::string &name, DataLayout weights_layout, + unsigned int base_depth, unsigned int num_units, unsigned int stride) { for(unsigned int i = 0; i < num_units; ++i) { @@ -151,7 +158,7 @@ private: SubStream right(graph); right << ConvolutionLayer( 1U, 1U, base_depth, - get_weights_accessor(data_path, unit_path + "conv1_weights.npy"), + get_weights_accessor(data_path, unit_path + "conv1_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) .set_name(unit_name + "conv1/convolution") @@ -166,7 +173,7 @@ private: << ConvolutionLayer( 3U, 3U, base_depth, - get_weights_accessor(data_path, unit_path + "conv2_weights.npy"), + get_weights_accessor(data_path, unit_path + "conv2_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(middle_stride, middle_stride, 1, 1)) .set_name(unit_name + "conv2/convolution") @@ -181,7 +188,7 @@ private: << ConvolutionLayer( 1U, 1U, base_depth * 4, - get_weights_accessor(data_path, unit_path + "conv3_weights.npy"), + get_weights_accessor(data_path, unit_path + "conv3_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) .set_name(unit_name + "conv3/convolution") @@ -198,7 +205,7 @@ private: SubStream left(graph); left << ConvolutionLayer( 1U, 1U, base_depth * 4, - get_weights_accessor(data_path, unit_path + "shortcut_weights.npy"), + get_weights_accessor(data_path, unit_path + "shortcut_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(1, 1, 0, 0)) .set_name(unit_name + "shortcut/convolution") diff --git a/examples/graph_resnext50.cpp b/examples/graph_resnext50.cpp index c0e9b9f22a..e7ef013f17 100644 --- a/examples/graph_resnext50.cpp +++ b/examples/graph_resnext50.cpp @@ -60,7 +60,6 @@ public: // Checks ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "Unsupported data type!"); - ARM_COMPUTE_EXIT_ON_MSG(common_params.data_layout == DataLayout::NHWC, "Unsupported data layout!"); // Print parameter values std::cout << common_params << std::endl; @@ -68,26 +67,32 @@ public: // Get trainable parameters data path std::string data_path = common_params.data_path; + // Create input descriptor + const TensorShape tensor_shape = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, common_params.data_layout); + TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout); + + // Set weights trained layout + const DataLayout weights_layout = DataLayout::NCHW; + graph << common_params.target << common_params.fast_math_hint - << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), common_params.data_type), - get_input_accessor(common_params)) + << InputLayer(input_descriptor, get_input_accessor(common_params)) << ScaleLayer(get_weights_accessor(data_path, "/cnn_data/resnext50_model/bn_data_mul.npy"), get_weights_accessor(data_path, "/cnn_data/resnext50_model/bn_data_add.npy")) .set_name("bn_data/Scale") << ConvolutionLayer( 7U, 7U, 64U, - get_weights_accessor(data_path, "/cnn_data/resnext50_model/conv0_weights.npy"), + get_weights_accessor(data_path, "/cnn_data/resnext50_model/conv0_weights.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/resnext50_model/conv0_biases.npy"), PadStrideInfo(2, 2, 2, 3, 2, 3, DimensionRoundingType::FLOOR)) .set_name("conv0/Convolution") << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv0/Relu") << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))).set_name("pool0"); - add_residual_block(data_path, /*ofm*/ 256, /*stage*/ 1, /*num_unit*/ 3, /*stride_conv_unit1*/ 1); - add_residual_block(data_path, 512, 2, 4, 2); - add_residual_block(data_path, 1024, 3, 6, 2); - add_residual_block(data_path, 2048, 4, 3, 2); + add_residual_block(data_path, weights_layout, /*ofm*/ 256, /*stage*/ 1, /*num_unit*/ 3, /*stride_conv_unit1*/ 1); + add_residual_block(data_path, weights_layout, 512, 2, 4, 2); + add_residual_block(data_path, weights_layout, 1024, 3, 6, 2); + add_residual_block(data_path, weights_layout, 2048, 4, 3, 2); graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG)).set_name("pool1") << FlattenLayer().set_name("predictions/Reshape") @@ -116,7 +121,8 @@ private: CommonGraphParams common_params; Stream graph; - void add_residual_block(const std::string &data_path, unsigned int base_depth, unsigned int stage, unsigned int num_units, unsigned int stride_conv_unit1) + void add_residual_block(const std::string &data_path, DataLayout weights_layout, + unsigned int base_depth, unsigned int stage, unsigned int num_units, unsigned int stride_conv_unit1) { for(unsigned int i = 0; i < num_units; ++i) { @@ -137,7 +143,7 @@ private: SubStream right(graph); right << ConvolutionLayer( 1U, 1U, base_depth / 2, - get_weights_accessor(data_path, unit_path + "conv1_weights.npy"), + get_weights_accessor(data_path, unit_path + "conv1_weights.npy", weights_layout), get_weights_accessor(data_path, unit_path + "conv1_biases.npy"), PadStrideInfo(1, 1, 0, 0)) .set_name(unit_name + "conv1/convolution") @@ -145,7 +151,7 @@ private: << ConvolutionLayer( 3U, 3U, base_depth / 2, - get_weights_accessor(data_path, unit_path + "conv2_weights.npy"), + get_weights_accessor(data_path, unit_path + "conv2_weights.npy", weights_layout), std::unique_ptr(nullptr), pad_grouped_conv, 32) .set_name(unit_name + "conv2/convolution") @@ -156,7 +162,7 @@ private: << ConvolutionLayer( 1U, 1U, base_depth, - get_weights_accessor(data_path, unit_path + "conv3_weights.npy"), + get_weights_accessor(data_path, unit_path + "conv3_weights.npy", weights_layout), get_weights_accessor(data_path, unit_path + "conv3_biases.npy"), PadStrideInfo(1, 1, 0, 0)) .set_name(unit_name + "conv3/convolution"); @@ -166,7 +172,7 @@ private: { left << ConvolutionLayer( 1U, 1U, base_depth, - get_weights_accessor(data_path, unit_path + "sc_weights.npy"), + get_weights_accessor(data_path, unit_path + "sc_weights.npy", weights_layout), std::unique_ptr(nullptr), PadStrideInfo(stride_conv_unit1, stride_conv_unit1, 0, 0)) .set_name(unit_name + "sc/convolution") diff --git a/examples/graph_squeezenet.cpp b/examples/graph_squeezenet.cpp index 9439ab4343..b539a9bc34 100644 --- a/examples/graph_squeezenet.cpp +++ b/examples/graph_squeezenet.cpp @@ -60,7 +60,6 @@ public: // Checks ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "Unsupported data type!"); - ARM_COMPUTE_EXIT_ON_MSG(common_params.data_layout == DataLayout::NHWC, "Unsupported data layout!"); // Print parameter values std::cout << common_params << std::endl; @@ -72,78 +71,84 @@ public: const std::array mean_rgb{ { 122.68f, 116.67f, 104.01f } }; std::unique_ptr preprocessor = arm_compute::support::cpp14::make_unique(mean_rgb); + // Create input descriptor + const TensorShape tensor_shape = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, common_params.data_layout); + TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout); + + // Set weights trained layout + const DataLayout weights_layout = DataLayout::NCHW; + graph << common_params.target << common_params.fast_math_hint - << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), common_params.data_type), - get_input_accessor(common_params, std::move(preprocessor))) + << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor))) << ConvolutionLayer( 7U, 7U, 96U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv1_w.npy"), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv1_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv1_b.npy"), PadStrideInfo(2, 2, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) << ConvolutionLayer( 1U, 1U, 16U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire2_squeeze1x1_w.npy"), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire2_squeeze1x1_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire2_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - graph << get_expand_fire_node(data_path, "fire2", 64U, 64U); + graph << get_expand_fire_node(data_path, "fire2", weights_layout, 64U, 64U); graph << ConvolutionLayer( 1U, 1U, 16U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire3_squeeze1x1_w.npy"), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire3_squeeze1x1_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire3_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - graph << get_expand_fire_node(data_path, "fire3", 64U, 64U); + graph << get_expand_fire_node(data_path, "fire3", weights_layout, 64U, 64U); graph << ConvolutionLayer( 1U, 1U, 32U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire4_squeeze1x1_w.npy"), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire4_squeeze1x1_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire4_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - graph << get_expand_fire_node(data_path, "fire4", 128U, 128U); + graph << get_expand_fire_node(data_path, "fire4", weights_layout, 128U, 128U); graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) << ConvolutionLayer( 1U, 1U, 32U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire5_squeeze1x1_w.npy"), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire5_squeeze1x1_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire5_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - graph << get_expand_fire_node(data_path, "fire5", 128U, 128U); + graph << get_expand_fire_node(data_path, "fire5", weights_layout, 128U, 128U); graph << ConvolutionLayer( 1U, 1U, 48U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire6_squeeze1x1_w.npy"), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire6_squeeze1x1_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire6_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - graph << get_expand_fire_node(data_path, "fire6", 192U, 192U); + graph << get_expand_fire_node(data_path, "fire6", weights_layout, 192U, 192U); graph << ConvolutionLayer( 1U, 1U, 48U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire7_squeeze1x1_w.npy"), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire7_squeeze1x1_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire7_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - graph << get_expand_fire_node(data_path, "fire7", 192U, 192U); + graph << get_expand_fire_node(data_path, "fire7", weights_layout, 192U, 192U); graph << ConvolutionLayer( 1U, 1U, 64U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire8_squeeze1x1_w.npy"), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire8_squeeze1x1_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire8_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - graph << get_expand_fire_node(data_path, "fire8", 256U, 256U); + graph << get_expand_fire_node(data_path, "fire8", weights_layout, 256U, 256U); graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) << ConvolutionLayer( 1U, 1U, 64U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire9_squeeze1x1_w.npy"), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire9_squeeze1x1_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire9_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - graph << get_expand_fire_node(data_path, "fire9", 256U, 256U); + graph << get_expand_fire_node(data_path, "fire9", weights_layout, 256U, 256U); graph << ConvolutionLayer( 1U, 1U, 1000U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv10_w.npy"), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv10_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv10_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) @@ -174,13 +179,14 @@ private: CommonGraphParams common_params; Stream graph; - BranchLayer get_expand_fire_node(const std::string &data_path, std::string &¶m_path, unsigned int expand1_filt, unsigned int expand3_filt) + BranchLayer get_expand_fire_node(const std::string &data_path, std::string &¶m_path, DataLayout weights_layout, + unsigned int expand1_filt, unsigned int expand3_filt) { std::string total_path = "/cnn_data/squeezenet_v1.0_model/" + param_path + "_"; SubStream i_a(graph); i_a << ConvolutionLayer( 1U, 1U, expand1_filt, - get_weights_accessor(data_path, total_path + "expand1x1_w.npy"), + get_weights_accessor(data_path, total_path + "expand1x1_w.npy", weights_layout), get_weights_accessor(data_path, total_path + "expand1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); @@ -188,7 +194,7 @@ private: SubStream i_b(graph); i_b << ConvolutionLayer( 3U, 3U, expand3_filt, - get_weights_accessor(data_path, total_path + "expand3x3_w.npy"), + get_weights_accessor(data_path, total_path + "expand3x3_w.npy", weights_layout), get_weights_accessor(data_path, total_path + "expand3x3_b.npy"), PadStrideInfo(1, 1, 1, 1)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); diff --git a/examples/graph_squeezenet_v1_1.cpp b/examples/graph_squeezenet_v1_1.cpp index ba7ee774a7..c0b5ff212d 100644 --- a/examples/graph_squeezenet_v1_1.cpp +++ b/examples/graph_squeezenet_v1_1.cpp @@ -60,7 +60,6 @@ public: // Checks ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "Unsupported data type!"); - ARM_COMPUTE_EXIT_ON_MSG(common_params.data_layout == DataLayout::NHWC, "Unsupported data layout!"); // Print parameter values std::cout << common_params << std::endl; @@ -72,80 +71,86 @@ public: const std::array mean_rgb{ { 122.68f, 116.67f, 104.01f } }; std::unique_ptr preprocessor = arm_compute::support::cpp14::make_unique(mean_rgb); + // Create input descriptor + const TensorShape tensor_shape = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, common_params.data_layout); + TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout); + + // Set weights trained layout + const DataLayout weights_layout = DataLayout::NCHW; + graph << common_params.target << common_params.fast_math_hint - << InputLayer(TensorDescriptor(TensorShape(227U, 227U, 3U, 1U), common_params.data_type), - get_input_accessor(common_params, std::move(preprocessor))) - << ConvolutionMethod::DIRECT + << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor))) + << ConvolutionMethod::Direct << ConvolutionLayer( 3U, 3U, 64U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv1_w.npy"), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv1_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv1_b.npy"), PadStrideInfo(2, 2, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) - << ConvolutionMethod::DEFAULT + << ConvolutionMethod::Default << ConvolutionLayer( 1U, 1U, 16U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire2_squeeze1x1_w.npy"), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire2_squeeze1x1_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire2_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - graph << get_expand_fire_node(data_path, "fire2", 64U, 64U); + graph << get_expand_fire_node(data_path, "fire2", weights_layout, 64U, 64U); graph << ConvolutionLayer( 1U, 1U, 16U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire3_squeeze1x1_w.npy"), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire3_squeeze1x1_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire3_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - graph << get_expand_fire_node(data_path, "fire3", 64U, 64U); + graph << get_expand_fire_node(data_path, "fire3", weights_layout, 64U, 64U); graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) << ConvolutionLayer( 1U, 1U, 32U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire4_squeeze1x1_w.npy"), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire4_squeeze1x1_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire4_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - graph << get_expand_fire_node(data_path, "fire4", 128U, 128U); + graph << get_expand_fire_node(data_path, "fire4", weights_layout, 128U, 128U); graph << ConvolutionLayer( 1U, 1U, 32U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire5_squeeze1x1_w.npy"), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire5_squeeze1x1_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire5_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - graph << get_expand_fire_node(data_path, "fire5", 128U, 128U); + graph << get_expand_fire_node(data_path, "fire5", weights_layout, 128U, 128U); graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) << ConvolutionLayer( 1U, 1U, 48U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire6_squeeze1x1_w.npy"), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire6_squeeze1x1_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire6_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - graph << get_expand_fire_node(data_path, "fire6", 192U, 192U); + graph << get_expand_fire_node(data_path, "fire6", weights_layout, 192U, 192U); graph << ConvolutionLayer( 1U, 1U, 48U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire7_squeeze1x1_w.npy"), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire7_squeeze1x1_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire7_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - graph << get_expand_fire_node(data_path, "fire7", 192U, 192U); + graph << get_expand_fire_node(data_path, "fire7", weights_layout, 192U, 192U); graph << ConvolutionLayer( 1U, 1U, 64U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire8_squeeze1x1_w.npy"), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire8_squeeze1x1_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire8_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - graph << get_expand_fire_node(data_path, "fire8", 256U, 256U); + graph << get_expand_fire_node(data_path, "fire8", weights_layout, 256U, 256U); graph << ConvolutionLayer( 1U, 1U, 64U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire9_squeeze1x1_w.npy"), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire9_squeeze1x1_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire9_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - graph << get_expand_fire_node(data_path, "fire9", 256U, 256U); + graph << get_expand_fire_node(data_path, "fire9", weights_layout, 256U, 256U); graph << ConvolutionLayer( 1U, 1U, 1000U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv10_w.npy"), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv10_w.npy", weights_layout), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv10_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) @@ -176,13 +181,14 @@ private: CommonGraphParams common_params; Stream graph; - BranchLayer get_expand_fire_node(const std::string &data_path, std::string &¶m_path, unsigned int expand1_filt, unsigned int expand3_filt) + BranchLayer get_expand_fire_node(const std::string &data_path, std::string &¶m_path, DataLayout weights_layout, + unsigned int expand1_filt, unsigned int expand3_filt) { std::string total_path = "/cnn_data/squeezenet_v1_1_model/" + param_path + "_"; SubStream i_a(graph); i_a << ConvolutionLayer( 1U, 1U, expand1_filt, - get_weights_accessor(data_path, total_path + "expand1x1_w.npy"), + get_weights_accessor(data_path, total_path + "expand1x1_w.npy", weights_layout), get_weights_accessor(data_path, total_path + "expand1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); @@ -190,7 +196,7 @@ private: SubStream i_b(graph); i_b << ConvolutionLayer( 3U, 3U, expand3_filt, - get_weights_accessor(data_path, total_path + "expand3x3_w.npy"), + get_weights_accessor(data_path, total_path + "expand3x3_w.npy", weights_layout), get_weights_accessor(data_path, total_path + "expand3x3_b.npy"), PadStrideInfo(1, 1, 1, 1)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); diff --git a/src/core/CL/cl_kernels/pooling_layer.cl b/src/core/CL/cl_kernels/pooling_layer.cl index c38a78ce3e..080835348d 100644 --- a/src/core/CL/cl_kernels/pooling_layer.cl +++ b/src/core/CL/cl_kernels/pooling_layer.cl @@ -549,10 +549,10 @@ __kernel void pooling_layer_MxN_nhwc( for(int y = 0; y < POOL_SIZE_Y; ++y) { - int y1 = select(y, PAD_Y - idx_height, y + idx_height < PAD_Y || y + idx_height > MAX_HEIGHT); + int y1 = select(y, PAD_Y - idx_height, y + idx_height - PAD_Y < 0 || y + idx_height - PAD_Y >= MAX_HEIGHT); for(int x = 0; x < POOL_SIZE_X; ++x) { - int x1 = select(x, PAD_X - idx_width - 1, x + idx_width < PAD_X || x + idx_width > MAX_WIDTH); + int x1 = select(x, PAD_X - idx_width - 1, x + idx_width - PAD_X < 0 || x + idx_width - PAD_X >= MAX_WIDTH); x1 = select(x1, PAD_X - idx_width - 1, y != y1); VEC_DATA_TYPE(DATA_TYPE, 8) diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index b1290b8edd..a09129bba6 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -288,7 +288,6 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_ON(input->info()->data_layout() == DataLayout::UNKNOWN); - ARM_COMPUTE_ERROR_ON_MSG(output->info()->data_layout() != DataLayout::NCHW, "Special case Im2Col output layout is NCHW"); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), has_bias, dilation)); _input = input; diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp index df01eab240..edc9e9d58c 100644 --- a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp @@ -42,6 +42,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, N ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_layout() == DataLayout::NHWC && norm_info.type() == NormType::IN_MAP_2D, + "Only Cross-map and 1D In-map normalization is supported for NHWC layout"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(norm_info.norm_size() % 2), "Normalization size should be odd"); // Checks performed when output is configured @@ -59,14 +61,15 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen // Output tensor auto initialization if not yet initialized auto_init_if_empty(*output, *input->clone()); - const unsigned int norm_size = norm_info.norm_size(); - bool is_in_map = norm_info.is_in_map(); + const unsigned int norm_idx = get_normalization_dimension_index(input->data_layout(), norm_info); + const unsigned int norm_size = norm_info.norm_size(); + bool is_norm_accross_width = norm_idx == 0; - const unsigned int border_width = is_in_map ? std::min(norm_size / 2, 3U) : 0; + const unsigned int border_width = is_norm_accross_width ? std::min(norm_size / 2, 3U) : 0; const BorderSize border_size = BorderSize(0, border_width); const unsigned int num_elems_processed_per_iteration = 4; - const unsigned int num_elems_read_per_iteration = is_in_map ? (num_elems_processed_per_iteration + 2 * (norm_size / 2)) : num_elems_processed_per_iteration; + const unsigned int num_elems_read_per_iteration = is_norm_accross_width ? (num_elems_processed_per_iteration + 2 * (norm_size / 2)) : num_elems_processed_per_iteration; Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); @@ -84,7 +87,7 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen } // namespace CLNormalizationLayerKernel::CLNormalizationLayerKernel() - : _input(nullptr), _output(nullptr), _border_size(0), _is_in_map(false) + : _input(nullptr), _output(nullptr), _border_size(0), _is_norm_across_width(false) { } @@ -106,8 +109,9 @@ void CLNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *ou _input = input; _output = output; - _is_in_map = norm_info.is_in_map(); - const unsigned int border_width = _is_in_map ? std::min(norm_info.norm_size() / 2, 3U) : 0; + const unsigned int norm_idx = get_normalization_dimension_index(input->info()->data_layout(), norm_info); + _is_norm_across_width = norm_idx == 0; + const unsigned int border_width = _is_norm_across_width ? std::min(norm_info.norm_size() / 2, 3U) : 0; _border_size = BorderSize(0, border_width); const unsigned int num_elems_processed_per_iteration = 4; @@ -125,7 +129,7 @@ void CLNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *ou build_opts.add_option_if(is_in_map_2D, "-DIN_MAP_2D"); // Create kernel - std::string kernel_name = _is_in_map ? "normalization_layer_in_map" : "normalization_layer_cross_map"; + std::string kernel_name = _is_norm_across_width ? "normalization_layer_in_map" : "normalization_layer_cross_map"; _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); // Configure kernel window @@ -159,7 +163,7 @@ void CLNormalizationLayerKernel::run(const Window &window, cl::CommandQueue &que ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - const int collapsed_dimension = _is_in_map ? Window::DimZ : 4; + const int collapsed_dimension = _is_norm_across_width ? Window::DimZ : 4; Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), collapsed_dimension); Window slice = window_collapsed.first_slice_window_3D(); diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp index 246ab68130..d5ea092c78 100644 --- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp @@ -154,7 +154,9 @@ std::tuple validate_and_configure_window(ITenso num_elems_processed_per_iteration = 8; win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); - AccessWindowRectangle input_access(input, 0, -pool_pad_left, num_elems_processed_per_iteration, pool_size_x); + AccessWindowStatic input_access(input, + 0, -1, + ceil_to_multiple(input->dimension(0), num_elems_processed_per_iteration), input->dimension(1)); AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); window_changed = update_window_and_padding(win, input_access, output_access); output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape())); diff --git a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp index cb1996f33e..15e8298e2d 100644 --- a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp @@ -43,6 +43,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *input_squ ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_layout() == DataLayout::NHWC && norm_info.type() == NormType::IN_MAP_2D, + "Only Cross-map and 1D In-map normalization is supported for NHWC layout"); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, input_squared); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, input_squared); ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(norm_info.norm_size() % 2), "Normalization size should be odd"); @@ -61,8 +63,9 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen { unsigned int num_elems_processed_per_iteration = 16 / input->element_size(); const unsigned int num_elems_read_per_iteration = num_elems_processed_per_iteration + 2 * (norm_info.norm_size() / 2); + const unsigned int norm_idx = get_normalization_dimension_index(input->data_layout(), norm_info); const unsigned int num_rows = (norm_info.type() == NormType::IN_MAP_2D) ? norm_info.norm_size() : 1; - const unsigned int border_width = (norm_info.is_cross_map()) ? 0 : std::min(norm_info.norm_size() / 2, 3U); + const unsigned int border_width = (norm_idx == 2) ? 0 : std::min(norm_info.norm_size() / 2, 3U); BorderSize border_size = BorderSize(0, border_width); bool window_changed = false; @@ -107,7 +110,8 @@ void NENormalizationLayerKernel::configure(const ITensor *input, const ITensor * // Perform validation step ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), input_squared->info(), output->info(), norm_info)); - const unsigned int border_width = (norm_info.is_cross_map()) ? 0 : std::min(norm_info.norm_size() / 2, 3U); + const unsigned int norm_idx = get_normalization_dimension_index(input->info()->data_layout(), norm_info); + const unsigned int border_width = (norm_idx == 2) ? 0 : std::min(norm_info.norm_size() / 2, 3U); _input = input; _input_squared = input_squared; @@ -119,16 +123,21 @@ void NENormalizationLayerKernel::configure(const ITensor *input, const ITensor * { case DataType::F32: { - switch(norm_info.type()) + switch(norm_idx) { - case NormType::IN_MAP_1D: - _func = &NENormalizationLayerKernel::normalize_float; - break; - case NormType::IN_MAP_2D: - // Normalize over X and Y - _func = &NENormalizationLayerKernel::normalize_float; + case 0: + { + if(norm_info.type() == NormType::IN_MAP_2D) + { + _func = &NENormalizationLayerKernel::normalize_float; + } + else + { + _func = &NENormalizationLayerKernel::normalize_float; + } break; - case NormType::CROSS_MAP: + } + case 2: _func = &NENormalizationLayerKernel::normalize_float; break; default: @@ -138,16 +147,21 @@ void NENormalizationLayerKernel::configure(const ITensor *input, const ITensor * } case DataType::F16: { - switch(norm_info.type()) + switch(norm_idx) { - case NormType::IN_MAP_1D: - _func = &NENormalizationLayerKernel::normalize_float; - break; - case NormType::IN_MAP_2D: - // Normalize over X and Y - _func = &NENormalizationLayerKernel::normalize_float; + case 0: + { + if(norm_info.type() == NormType::IN_MAP_2D) + { + _func = &NENormalizationLayerKernel::normalize_float; + } + else + { + _func = &NENormalizationLayerKernel::normalize_float; + } break; - case NormType::CROSS_MAP: + } + case 2: _func = &NENormalizationLayerKernel::normalize_float; break; default: diff --git a/src/graph/GraphBuilder.cpp b/src/graph/GraphBuilder.cpp index d26039ec35..b3721719d9 100644 --- a/src/graph/GraphBuilder.cpp +++ b/src/graph/GraphBuilder.cpp @@ -88,10 +88,14 @@ NodeID create_grouped_convolution(Graph &g, const NodeParams ¶ms, NodeIdxPai bool has_bias = (bias != EmptyNodeID); // Split input - NodeID input_split = GraphBuilder::add_split_node(g, params, input, num_groups, 2); + const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]); + const unsigned int input_idx = get_dimension_idx(input_tensor_desc, DataLayoutDimension::CHANNEL); + NodeID input_split = GraphBuilder::add_split_node(g, params, input, num_groups, input_idx); // Split weights - NodeID weights_split = GraphBuilder::add_split_node(g, params, { weights, 0 }, num_groups, 3); + const TensorDescriptor weights_tensor_desc = get_tensor_descriptor(g, g.node(weights)->outputs()[0]); + const unsigned int batch_idx = get_dimension_idx(weights_tensor_desc, DataLayoutDimension::BATCHES); + NodeID weights_split = GraphBuilder::add_split_node(g, params, { weights, 0 }, num_groups, batch_idx); // Split bias NodeID bias_split = EmptyNodeID; @@ -122,7 +126,7 @@ NodeID create_grouped_convolution(Graph &g, const NodeParams ¶ms, NodeIdxPai } // Depth concatenate output - return GraphBuilder::add_depth_concatenate_node(g, params, convolution_outputs); + return GraphBuilder::add_concatenate_node(g, params, convolution_outputs, DataLayoutDimension::CHANNEL); } } // namespace @@ -329,11 +333,11 @@ NodeID GraphBuilder::add_deconvolution_node(Graph &g, NodeParams params, NodeIdx return deconv_nid; } -NodeID GraphBuilder::add_depth_concatenate_node(Graph &g, NodeParams params, std::vector inputs) +NodeID GraphBuilder::add_concatenate_node(Graph &g, NodeParams params, std::vector inputs, DataLayoutDimension axis) { ARM_COMPUTE_ERROR_ON(inputs.size() == 0); - NodeID nid = g.add_node(inputs.size()); + NodeID nid = g.add_node(inputs.size(), axis); unsigned int i = 0; for(const auto &input : inputs) @@ -508,9 +512,9 @@ NodeID GraphBuilder::add_scale_layer(Graph &g, const NodeParams ¶ms, NodeIdx NodeIdxPair add_const_nidxp = { add_const_nid, 0 }; // Create node and connect - NodeID mul_node = GraphBuilder::add_elementwise_node(g, params, input, mul_const_nidxp, EltwiseOperation::MUL); + NodeID mul_node = GraphBuilder::add_elementwise_node(g, params, input, mul_const_nidxp, EltwiseOperation::Mul); NodeIdxPair mulnode_nidxp = { mul_node, 0 }; - NodeID add_node = GraphBuilder::add_elementwise_node(g, params, mulnode_nidxp, add_const_nidxp, EltwiseOperation::ADD); + NodeID add_node = GraphBuilder::add_elementwise_node(g, params, mulnode_nidxp, add_const_nidxp, EltwiseOperation::Add); return add_node; } diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp index 4d6734846a..57871487ef 100644 --- a/src/graph/backends/CL/CLFunctionsFactory.cpp +++ b/src/graph/backends/CL/CLFunctionsFactory.cpp @@ -89,8 +89,8 @@ std::unique_ptr CLFunctionFactory::create(INode *node, GraphContext & return detail::create_convolution_layer(*polymorphic_downcast(node), ctx); case NodeType::DeconvolutionLayer: return detail::create_deconvolution_layer(*polymorphic_downcast(node), ctx); - case NodeType::DepthConcatenateLayer: - return detail::create_depth_concatenate_layer(*polymorphic_downcast(node)); + case NodeType::ConcatenateLayer: + return detail::create_concatenate_layer(*polymorphic_downcast(node)); case NodeType::DepthwiseConvolutionLayer: return detail::create_depthwise_convolution_layer(*polymorphic_downcast(node)); case NodeType::EltwiseLayer: diff --git a/src/graph/backends/GLES/GCFunctionsFactory.cpp b/src/graph/backends/GLES/GCFunctionsFactory.cpp index e6bd5a5f02..f72513c87c 100644 --- a/src/graph/backends/GLES/GCFunctionsFactory.cpp +++ b/src/graph/backends/GLES/GCFunctionsFactory.cpp @@ -68,6 +68,42 @@ struct GCEltwiseFunctions namespace detail { +// Specialize functions +template <> +std::unique_ptr create_concatenate_layer(ConcatenateLayerNode &node) +{ + ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating Concatenate node with ID : " << node.id() << " and Name: " << node.name() << std::endl); + ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1); + + // Return nullptr if depth concatenate is switched off + if(!node.is_enabled()) + { + return nullptr; + } + + // Extract IO and info + std::vector inputs; + for(unsigned int i = 0; i < node.num_inputs(); ++i) + { + inputs.push_back(get_backing_tensor(node.input(i))); + } + typename GCTargetInfo::TensorType *output = get_backing_tensor(node.output(0)); + + // Create and configure function + auto func = support::cpp14::make_unique(); + func->configure(inputs, output); + + // Log info + ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << node.type() + << " Target " << GCTargetInfo::TargetType + << " Data Type: " << output->info()->data_type() + << " Shape: " << output->info()->tensor_shape() + << " Num Inputs: " << inputs.size() + << std::endl); + + return std::move(func); +} + template <> std::unique_ptr create_convolution_layer(ConvolutionLayerNode &node, GraphContext &ctx) { @@ -92,7 +128,7 @@ std::unique_ptr create_convolution_layer func; std::string func_name; - if(conv_algorithm == ConvolutionMethod::DIRECT) + if(conv_algorithm == ConvolutionMethod::Direct) { std::tie(func, func_name) = create_named_function( std::string("DirectConvolutionLayer"), @@ -139,7 +175,7 @@ std::unique_ptr create_depthwise_convolution_layer func; std::string func_name; - if(dwc_algorithm == DepthwiseConvolutionMethod::OPTIMIZED_3x3) + if(dwc_algorithm == DepthwiseConvolutionMethod::Optimized3x3) { std::tie(func, func_name) = create_named_function( std::string("DepthwiseConvolutionLayer3x3"), @@ -183,17 +219,17 @@ std::unique_ptr create_eltwise_layer func = nullptr; std::string func_name; - if(eltwise_op == EltwiseOperation::ADD) + if(eltwise_op == EltwiseOperation::Add) { std::tie(func, func_name) = create_named_function( std::string("GCArithmeticAddition"), input1, input2, output, convert_policy); } - else if(eltwise_op == EltwiseOperation::SUB) + else if(eltwise_op == EltwiseOperation::Sub) { ARM_COMPUTE_ERROR("Arithmetic subtraction is not supported in GLES backend"); } - else if(eltwise_op == EltwiseOperation::MUL) + else if(eltwise_op == EltwiseOperation::Mul) { std::tie(func, func_name) = create_named_function( std::string("PixelWiseMultiplication"), @@ -232,8 +268,8 @@ std::unique_ptr GCFunctionFactory::create(INode *node, GraphContext & return detail::create_batch_normalization_layer(*polymorphic_downcast(node)); case NodeType::ConvolutionLayer: return detail::create_convolution_layer(*polymorphic_downcast(node), ctx); - case NodeType::DepthConcatenateLayer: - return detail::create_depth_concatenate_layer(*polymorphic_downcast(node)); + case NodeType::ConcatenateLayer: + return detail::create_concatenate_layer(*polymorphic_downcast(node)); case NodeType::DepthwiseConvolutionLayer: return detail::create_depthwise_convolution_layer(*polymorphic_downcast(node)); case NodeType::EltwiseLayer: diff --git a/src/graph/backends/GLES/GCNodeValidator.cpp b/src/graph/backends/GLES/GCNodeValidator.cpp index 4bef89329a..8118a7c476 100644 --- a/src/graph/backends/GLES/GCNodeValidator.cpp +++ b/src/graph/backends/GLES/GCNodeValidator.cpp @@ -58,7 +58,7 @@ Status validate_depthwise_convolution_layer(DepthwiseConvolutionLayerNode &node) // TODO (geopin01) : Switch when validation is implemented // Validate function ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->tensor_shape().x() != 3 && weights->tensor_shape().y() != 3, "Unsupported depthwise convolution"); - node.set_depthwise_convolution_method(DepthwiseConvolutionMethod::OPTIMIZED_3x3); + node.set_depthwise_convolution_method(DepthwiseConvolutionMethod::Optimized3x3); return Status{}; } @@ -80,14 +80,14 @@ Status validate_convolution_layer(ConvolutionLayerNode &node) const ConvolutionMethod conv_algorithm = node.convolution_method(); // Validate function - if(conv_algorithm == ConvolutionMethod::DIRECT) + if(conv_algorithm == ConvolutionMethod::Direct) { bool is_square = weights->tensor_shape().x() == weights->tensor_shape().y(); bool is_direct = (weights->tensor_shape().x() == 1) || (weights->tensor_shape().x() == 3) || (weights->tensor_shape().x() == 5); bool is_correct_stride = (conv_info.stride().first) <= 2 && (conv_info.stride().second <= 2); if(!(is_square && is_direct && is_correct_stride)) { - node.set_convolution_method(ConvolutionMethod::DEFAULT); + node.set_convolution_method(ConvolutionMethod::Default); } } diff --git a/src/graph/backends/NEON/NEFunctionFactory.cpp b/src/graph/backends/NEON/NEFunctionFactory.cpp index 3b7417da3f..6c912a02f1 100644 --- a/src/graph/backends/NEON/NEFunctionFactory.cpp +++ b/src/graph/backends/NEON/NEFunctionFactory.cpp @@ -102,7 +102,7 @@ std::unique_ptr create_convolution_layer mm = get_memory_manager(ctx, Target::NEON); std::unique_ptr func; std::string func_name; - if(conv_algorithm == ConvolutionMethod::DIRECT) + if(conv_algorithm == ConvolutionMethod::Direct) { std::tie(func, func_name) = create_named_memory_managed_function( std::string("DirectConvolutionLayer"), mm, input, weights, biases, output, conv_info); @@ -112,7 +112,7 @@ std::unique_ptr create_convolution_layer( std::string("GEMMConvolutionLayer"), mm, input, weights, biases, output, conv_info); } - else if(conv_algorithm == ConvolutionMethod::WINOGRAD) + else if(conv_algorithm == ConvolutionMethod::Winograd) { std::tie(func, func_name) = create_named_memory_managed_function( std::string("WinogradConvolutionLayer"), mm, input, weights, biases, output, conv_info); @@ -183,8 +183,8 @@ std::unique_ptr NEFunctionFactory::create(INode *node, GraphContext & return detail::create_convolution_layer(*polymorphic_downcast(node), ctx); case NodeType::DeconvolutionLayer: return detail::create_deconvolution_layer(*polymorphic_downcast(node), ctx); - case NodeType::DepthConcatenateLayer: - return detail::create_depth_concatenate_layer(*polymorphic_downcast(node)); + case NodeType::ConcatenateLayer: + return detail::create_concatenate_layer(*polymorphic_downcast(node)); case NodeType::DepthwiseConvolutionLayer: return detail::create_depthwise_convolution_layer(*polymorphic_downcast(node)); case NodeType::EltwiseLayer: diff --git a/src/graph/mutators/DepthConcatSubTensorMutator.cpp b/src/graph/mutators/DepthConcatSubTensorMutator.cpp index c56f4c5106..241c07b367 100644 --- a/src/graph/mutators/DepthConcatSubTensorMutator.cpp +++ b/src/graph/mutators/DepthConcatSubTensorMutator.cpp @@ -25,8 +25,9 @@ #include "arm_compute/graph/Graph.h" #include "arm_compute/graph/Logger.h" +#include "arm_compute/graph/Utils.h" #include "arm_compute/graph/backends/BackendRegistry.h" -#include "arm_compute/graph/nodes/DepthConcatenateLayerNode.h" +#include "arm_compute/graph/nodes/ConcatenateLayerNode.h" #include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/core/utils/misc/Iterable.h" @@ -45,11 +46,18 @@ void DepthConcatSubTensorMutator::mutate(Graph &g) // Should be in reverse order of execution for(auto &node : arm_compute::utils::iterable::reverse_iterate(g.nodes())) { - if(node && node->type() == NodeType::DepthConcatenateLayer && node->output(0) != nullptr) + if(node && node->type() == NodeType::ConcatenateLayer && node->output(0) != nullptr) { // Get output tensor auto output_tensor = node->output(0); + // Check concatenation axis (Sub-tensor optimization is support for concatenation axis >=2) + auto *concat_node = arm_compute::utils::cast::polymorphic_downcast(node.get()); + if(output_tensor == nullptr || get_dimension_idx(output_tensor->desc(), concat_node->concatenation_axis()) < 2) + { + continue; + } + // Check that all tensor have the same target and valid inputs bool is_valid = std::all_of(node->input_edges().cbegin(), node->input_edges().cend(), [&](const EdgeID & eid) @@ -76,7 +84,7 @@ void DepthConcatSubTensorMutator::mutate(Graph &g) depth += input_shape.z(); } - auto *dc_node = arm_compute::utils::cast::polymorphic_downcast(node.get()); + auto *dc_node = arm_compute::utils::cast::polymorphic_downcast(node.get()); dc_node->set_enabled(false); } } diff --git a/src/graph/nodes/ConcatenateLayerNode.cpp b/src/graph/nodes/ConcatenateLayerNode.cpp new file mode 100644 index 0000000000..ade3f6e1a9 --- /dev/null +++ b/src/graph/nodes/ConcatenateLayerNode.cpp @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/graph/nodes/ConcatenateLayerNode.h" + +#include "arm_compute/core/Utils.h" +#include "arm_compute/graph/Graph.h" +#include "arm_compute/graph/INodeVisitor.h" +#include "arm_compute/graph/Utils.h" + +#include "arm_compute/core/utils/misc/ShapeCalculator.h" + +namespace arm_compute +{ +namespace graph +{ +ConcatenateLayerNode::ConcatenateLayerNode(unsigned int total_nodes, DataLayoutDimension axis) + : _total_nodes(total_nodes), _axis(axis), _is_enabled(true) +{ + _input_edges.resize(_total_nodes, EmptyEdgeID); + _outputs.resize(1, NullTensorID); +} + +void ConcatenateLayerNode::set_enabled(bool is_enabled) +{ + _is_enabled = is_enabled; +} + +bool ConcatenateLayerNode::is_enabled() const +{ + return _is_enabled; +} + +DataLayoutDimension ConcatenateLayerNode::concatenation_axis() const +{ + return _axis; +} + +TensorDescriptor ConcatenateLayerNode::compute_output_descriptor(const std::vector &input_descriptors, + DataLayoutDimension axis) +{ + ARM_COMPUTE_ERROR_ON(input_descriptors.size() == 0); + + TensorDescriptor output_descriptor = input_descriptors[0]; + const int axis_idx = get_dimension_idx(output_descriptor, axis); + + // Extract shapes + std::vector shapes; + for(auto &input_descriptor : input_descriptors) + { + shapes.emplace_back(&input_descriptor.shape); + } + + // Calculate output shape + if(axis_idx == 0) + { + output_descriptor.shape = arm_compute::misc::shape_calculator::calculate_width_concatenate_shape(shapes); + } + else if(axis_idx == 2) + { + output_descriptor.shape = arm_compute::misc::shape_calculator::calculate_depth_concatenate_shape(shapes); + } + else + { + ARM_COMPUTE_ERROR("Unsupported concatenation axis!"); + } + + return output_descriptor; +} + +bool ConcatenateLayerNode::forward_descriptors() +{ + if(_outputs[0] != NullTensorID) + { + Tensor *dst = output(0); + ARM_COMPUTE_ERROR_ON(dst == nullptr); + dst->desc() = configure_output(0); + return true; + } + return false; +} + +TensorDescriptor ConcatenateLayerNode::configure_output(size_t idx) const +{ + ARM_COMPUTE_UNUSED(idx); + ARM_COMPUTE_ERROR_ON(idx >= _outputs.size()); + + // Check if all input tensors are set + bool are_all_inputs_set = std::all_of(std::begin(_input_edges), std::end(_input_edges), [](const EdgeID & eid) + { + return eid != EmptyEdgeID; + }); + + TensorDescriptor output_info = {}; + + if(are_all_inputs_set) + { + std::vector inputs_descriptors; + for(unsigned int i = 0; i < _input_edges.size(); ++i) + { + const Tensor *t = _graph->tensor(input_id(i)); + ARM_COMPUTE_ERROR_ON(t == nullptr); + inputs_descriptors.push_back(t->desc()); + } + output_info = compute_output_descriptor(inputs_descriptors, _axis); + } + + return output_info; +} + +NodeType ConcatenateLayerNode::type() const +{ + return NodeType::ConcatenateLayer; +} + +void ConcatenateLayerNode::accept(INodeVisitor &v) +{ + v.visit(*this); +} +} // namespace graph +} // namespace arm_compute \ No newline at end of file diff --git a/src/graph/nodes/DepthConcatenateLayerNode.cpp b/src/graph/nodes/DepthConcatenateLayerNode.cpp deleted file mode 100644 index 08cccc1ff1..0000000000 --- a/src/graph/nodes/DepthConcatenateLayerNode.cpp +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/graph/nodes/DepthConcatenateLayerNode.h" - -#include "arm_compute/core/Utils.h" -#include "arm_compute/graph/Graph.h" -#include "arm_compute/graph/INodeVisitor.h" - -namespace arm_compute -{ -namespace graph -{ -DepthConcatenateLayerNode::DepthConcatenateLayerNode(unsigned int total_nodes) - : _total_nodes(total_nodes), _is_enabled(true) -{ - _input_edges.resize(_total_nodes, EmptyEdgeID); - _outputs.resize(1, NullTensorID); -} - -void DepthConcatenateLayerNode::set_enabled(bool is_enabled) -{ - _is_enabled = is_enabled; -} - -bool DepthConcatenateLayerNode::is_enabled() const -{ - return _is_enabled; -} - -TensorDescriptor DepthConcatenateLayerNode::compute_output_descriptor(const std::vector &input_descriptors) -{ - ARM_COMPUTE_ERROR_ON(input_descriptors.size() == 0); - - TensorDescriptor output_descriptor = input_descriptors[0]; - - size_t max_x = 0; - size_t max_y = 0; - size_t depth = 0; - - for(const auto &input_descriptor : input_descriptors) - { - max_x = std::max(input_descriptor.shape.x(), max_x); - max_y = std::max(input_descriptor.shape.y(), max_y); - depth += input_descriptor.shape.z(); - } - - output_descriptor.shape.set(0, max_x); - output_descriptor.shape.set(1, max_y); - output_descriptor.shape.set(2, depth); - - return output_descriptor; -} - -bool DepthConcatenateLayerNode::forward_descriptors() -{ - if(_outputs[0] != NullTensorID) - { - Tensor *dst = output(0); - ARM_COMPUTE_ERROR_ON(dst == nullptr); - dst->desc() = configure_output(0); - return true; - } - return false; -} - -TensorDescriptor DepthConcatenateLayerNode::configure_output(size_t idx) const -{ - ARM_COMPUTE_UNUSED(idx); - ARM_COMPUTE_ERROR_ON(idx >= _outputs.size()); - - // Check if all input tensors are set - bool are_all_inputs_set = std::all_of(std::begin(_input_edges), std::end(_input_edges), [](const EdgeID & eid) - { - return eid != EmptyEdgeID; - }); - - TensorDescriptor output_info = {}; - - if(are_all_inputs_set) - { - std::vector inputs_descriptors; - for(unsigned int i = 0; i < _input_edges.size(); ++i) - { - const Tensor *t = _graph->tensor(input_id(i)); - ARM_COMPUTE_ERROR_ON(t == nullptr); - inputs_descriptors.push_back(t->desc()); - } - output_info = compute_output_descriptor(inputs_descriptors); - } - - return output_info; -} - -NodeType DepthConcatenateLayerNode::type() const -{ - return NodeType::DepthConcatenateLayer; -} - -void DepthConcatenateLayerNode::accept(INodeVisitor &v) -{ - v.visit(*this); -} -} // namespace graph -} // namespace arm_compute \ No newline at end of file diff --git a/src/graph/printers/DotGraphPrinter.cpp b/src/graph/printers/DotGraphPrinter.cpp index 61cf42356f..ef156ea252 100644 --- a/src/graph/printers/DotGraphPrinter.cpp +++ b/src/graph/printers/DotGraphPrinter.cpp @@ -47,17 +47,19 @@ void DotGraphVisitor::visit(BatchNormalizationLayerNode &n) _info = ss.str(); } -void DotGraphVisitor::visit(ConvolutionLayerNode &n) +void DotGraphVisitor::visit(ConcatenateLayerNode &n) { std::stringstream ss; - ss << n.convolution_method(); + ss << "Enabled: " << n.is_enabled(); + ss << R"( \n )"; + ss << "Axis: " << n.concatenation_axis(); _info = ss.str(); } -void DotGraphVisitor::visit(DepthConcatenateLayerNode &n) +void DotGraphVisitor::visit(ConvolutionLayerNode &n) { std::stringstream ss; - ss << "Enabled: " << n.is_enabled(); + ss << n.convolution_method(); _info = ss.str(); } diff --git a/utils/CommonGraphOptions.cpp b/utils/CommonGraphOptions.cpp index 7ac7bbce2b..2e5d787448 100644 --- a/utils/CommonGraphOptions.cpp +++ b/utils/CommonGraphOptions.cpp @@ -84,7 +84,7 @@ namespace utils os << "Data layout : " << common_params.data_layout << std::endl; os << "Tuner enabled? : " << (common_params.enable_tuner ? true_str : false_str) << std::endl; os << "Tuner file : " << common_params.tuner_file << std::endl; - os << "Fast math enabled? : " << (common_params.fast_math_hint == FastMathHint::ENABLED ? true_str : false_str) << std::endl; + os << "Fast math enabled? : " << (common_params.fast_math_hint == FastMathHint::Enabled ? true_str : false_str) << std::endl; if(!common_params.data_path.empty()) { os << "Data path : " << common_params.data_path << std::endl; @@ -168,7 +168,7 @@ CommonGraphOptions::CommonGraphOptions(CommandLineParser &parser) CommonGraphParams consume_common_graph_parameters(CommonGraphOptions &options) { - FastMathHint fast_math_hint_value = options.fast_math_hint->value() ? FastMathHint::ENABLED : FastMathHint::DISABLED; + FastMathHint fast_math_hint_value = options.fast_math_hint->value() ? FastMathHint::Enabled : FastMathHint::Disabled; auto validation_range = parse_validation_range(options.validation_range->value()); CommonGraphParams common_params; @@ -178,7 +178,7 @@ CommonGraphParams consume_common_graph_parameters(CommonGraphOptions &options) common_params.data_type = options.data_type->value(); common_params.data_layout = options.data_layout->value(); common_params.enable_tuner = options.enable_tuner->is_set() ? options.enable_tuner->value() : false; - common_params.fast_math_hint = options.fast_math_hint->is_set() ? fast_math_hint_value : FastMathHint::DISABLED; + common_params.fast_math_hint = options.fast_math_hint->is_set() ? fast_math_hint_value : FastMathHint::Disabled; common_params.data_path = options.data_path->value(); common_params.image = options.image->value(); common_params.labels = options.labels->value(); diff --git a/utils/CommonGraphOptions.h b/utils/CommonGraphOptions.h index ce4a2c9dd1..23c3cc7c30 100644 --- a/utils/CommonGraphOptions.h +++ b/utils/CommonGraphOptions.h @@ -93,7 +93,7 @@ struct CommonGraphParams arm_compute::DataType data_type{ DataType::F32 }; arm_compute::DataLayout data_layout{ DataLayout::NCHW }; bool enable_tuner{ false }; - arm_compute::graph::FastMathHint fast_math_hint{ arm_compute::graph::FastMathHint::DISABLED }; + arm_compute::graph::FastMathHint fast_math_hint{ arm_compute::graph::FastMathHint::Disabled }; std::string data_path{}; std::string image{}; std::string labels{}; diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h index eb03ff9b96..8bfdba9725 100644 --- a/utils/TypePrinter.h +++ b/utils/TypePrinter.h @@ -475,6 +475,35 @@ inline std::string to_string(const arm_compute::DataLayout &data_layout) return str.str(); } +/** Formatted output of the DataLayoutDimension type. + * + * @param[out] os Output stream. + * @param[in] data_layout_dim Data layout dimension to print. + * + * @return Modified output stream. + */ +inline ::std::ostream &operator<<(::std::ostream &os, const DataLayoutDimension &data_layout_dim) +{ + switch(data_layout_dim) + { + case DataLayoutDimension::WIDTH: + os << "WIDTH"; + break; + case DataLayoutDimension::HEIGHT: + os << "HEIGHT"; + break; + case DataLayoutDimension::CHANNEL: + os << "CHANNEL"; + break; + case DataLayoutDimension::BATCHES: + os << "BATCHES"; + break; + default: + ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); + } + return os; +} + /** Formatted output of the DataType type. * * @param[out] os Output stream. -- cgit v1.2.1