aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-07-20 13:23:44 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commite2220551b7a64b929650ba9a60529c31e70c13c5 (patch)
tree5d609887f15b4392cdade7bb388710ceafc62260
parenteff8d95991205e874091576e2d225f63246dd0bb (diff)
downloadComputeLibrary-e2220551b7a64b929650ba9a60529c31e70c13c5.tar.gz
COMPMID-1367: Enable NHWC in graph examples
Change-Id: Iabc54a3a1bdcd46a9a921cda39c7c85fef672b72 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/141449 Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h2
-rw-r--r--arm_compute/core/Helpers.h60
-rw-r--r--arm_compute/core/utils/misc/ShapeCalculator.h30
-rw-r--r--arm_compute/graph/GraphBuilder.h9
-rw-r--r--arm_compute/graph/INodeVisitor.h16
-rw-r--r--arm_compute/graph/TypePrinter.h44
-rw-r--r--arm_compute/graph/Types.h24
-rw-r--r--arm_compute/graph/backends/FunctionHelpers.h116
-rw-r--r--arm_compute/graph/backends/ValidateHelpers.h16
-rw-r--r--arm_compute/graph/frontend/Layers.h6
-rw-r--r--arm_compute/graph/frontend/Types.h6
-rw-r--r--arm_compute/graph/nodes/ConcatenateLayerNode.h (renamed from arm_compute/graph/nodes/DepthConcatenateLayerNode.h)37
-rw-r--r--arm_compute/graph/nodes/ConvolutionLayerNode.h6
-rw-r--r--arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h2
-rw-r--r--arm_compute/graph/nodes/Nodes.h2
-rw-r--r--arm_compute/graph/nodes/NodesFwd.h2
-rw-r--r--arm_compute/graph/printers/DotGraphPrinter.h2
-rw-r--r--examples/graph_alexnet.cpp27
-rw-r--r--examples/graph_googlenet.cpp51
-rw-r--r--examples/graph_inception_v3.cpp129
-rw-r--r--examples/graph_inception_v4.cpp163
-rw-r--r--examples/graph_mobilenet.cpp2
-rw-r--r--examples/graph_resnet50.cpp35
-rw-r--r--examples/graph_resnext50.cpp32
-rw-r--r--examples/graph_squeezenet.cpp54
-rw-r--r--examples/graph_squeezenet_v1_1.cpp58
-rw-r--r--src/core/CL/cl_kernels/pooling_layer.cl4
-rw-r--r--src/core/CL/kernels/CLIm2ColKernel.cpp1
-rw-r--r--src/core/CL/kernels/CLNormalizationLayerKernel.cpp22
-rw-r--r--src/core/CL/kernels/CLPoolingLayerKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NENormalizationLayerKernel.cpp50
-rw-r--r--src/graph/GraphBuilder.cpp18
-rw-r--r--src/graph/backends/CL/CLFunctionsFactory.cpp4
-rw-r--r--src/graph/backends/GLES/GCFunctionsFactory.cpp50
-rw-r--r--src/graph/backends/GLES/GCNodeValidator.cpp6
-rw-r--r--src/graph/backends/NEON/NEFunctionFactory.cpp8
-rw-r--r--src/graph/mutators/DepthConcatSubTensorMutator.cpp14
-rw-r--r--src/graph/nodes/ConcatenateLayerNode.cpp (renamed from src/graph/nodes/DepthConcatenateLayerNode.cpp)62
-rw-r--r--src/graph/printers/DotGraphPrinter.cpp10
-rw-r--r--utils/CommonGraphOptions.cpp6
-rw-r--r--utils/CommonGraphOptions.h2
-rw-r--r--utils/TypePrinter.h29
42 files changed, 704 insertions, 517 deletions
diff --git a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h
index f2d37a781c..beeb8b838e 100644
--- a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h
@@ -72,7 +72,7 @@ private:
const ICLTensor *_input;
ICLTensor *_output;
BorderSize _border_size;
- bool _is_in_map;
+ bool _is_norm_across_width;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H__ */
diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h
index 374e36442b..ef59323073 100644
--- a/arm_compute/core/Helpers.h
+++ b/arm_compute/core/Helpers.h
@@ -111,28 +111,6 @@ struct is_contained<T, std::tuple<U, Ts...>> : is_contained<T, std::tuple<Ts...>
};
}
-/** Calculate the number of output tiles required by Winograd Convolution layer. This utility function can be used by the Winograd input transform
- * to know the number of tiles on the x and y direction
- *
- * @param[in] in_dims Spatial dimensions of the input tensor of convolution layer
- * @param[in] kernel_size Kernel size
- * @param[in] output_tile_size Size of a single output tile
- * @param[in] conv_info Convolution info (i.e. pad, stride,...)
- *
- * @return the number of output tiles along the x and y directions of size "output_tile_size"
- */
-inline Size2D compute_winograd_convolution_tiles(const Size2D &in_dims, const Size2D &kernel_size, const Size2D &output_tile_size, const PadStrideInfo &conv_info)
-{
- int num_tiles_x = std::ceil((in_dims.width - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / static_cast<float>(output_tile_size.width));
- int num_tiles_y = std::ceil((in_dims.height - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / static_cast<float>(output_tile_size.height));
-
- // Clamp in case we provide paddings but we have 1D convolution
- num_tiles_x = std::min(num_tiles_x, static_cast<int>(in_dims.width));
- num_tiles_y = std::min(num_tiles_y, static_cast<int>(in_dims.height));
-
- return Size2D(num_tiles_x, num_tiles_y);
-}
-
/** Computes bilinear interpolation using the pointer to the top-left pixel and the pixel's distance between
* the real coordinates and the smallest following integer coordinates. Input must be in single channel format.
*
@@ -694,6 +672,44 @@ inline int coords2index(const TensorShape &shape, const Coordinates &coord);
* @return The int conversion of the requested data layout index.
*/
inline size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension);
+
+/** Calculate the normalization dimension index for a given normalization type
+ *
+ * @param[in] layout Data layout of the input and output tensor
+ * @param[in] info Normalization info
+ *
+ * @return Normalization dimension index
+ */
+inline unsigned int get_normalization_dimension_index(DataLayout layout, const NormalizationLayerInfo &info)
+{
+ const unsigned int width_idx = get_data_layout_dimension_index(layout, DataLayoutDimension::WIDTH);
+ const unsigned int channel_idx = get_data_layout_dimension_index(layout, DataLayoutDimension::CHANNEL);
+
+ return info.is_in_map() ? width_idx : channel_idx;
+}
+
+/** Calculate the number of output tiles required by Winograd Convolution layer. This utility function can be used by the Winograd input transform
+ * to know the number of tiles on the x and y direction
+ *
+ * @param[in] in_dims Spatial dimensions of the input tensor of convolution layer
+ * @param[in] kernel_size Kernel size
+ * @param[in] output_tile_size Size of a single output tile
+ * @param[in] conv_info Convolution info (i.e. pad, stride,...)
+ *
+ * @return the number of output tiles along the x and y directions of size "output_tile_size"
+ */
+inline Size2D compute_winograd_convolution_tiles(const Size2D &in_dims, const Size2D &kernel_size, const Size2D &output_tile_size, const PadStrideInfo &conv_info)
+{
+ int num_tiles_x = std::ceil((in_dims.width - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / static_cast<float>(output_tile_size.width));
+ int num_tiles_y = std::ceil((in_dims.height - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / static_cast<float>(output_tile_size.height));
+
+ // Clamp in case we provide paddings but we have 1D convolution
+ num_tiles_x = std::min(num_tiles_x, static_cast<int>(in_dims.width));
+ num_tiles_y = std::min(num_tiles_y, static_cast<int>(in_dims.height));
+
+ return Size2D(num_tiles_x, num_tiles_y);
+}
+
} // namespace arm_compute
#include "arm_compute/core/Helpers.inl"
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index e5516ba154..dbf26a423d 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -201,15 +201,8 @@ inline TensorShape compute_im2col_fc_shape(const ITensorInfo *input, const int n
inline TensorShape compute_im2col_flatten_shape(const ITensorInfo *input)
{
// The output shape will be the flatten version of the input (i.e. [ width * height * channels, 1, 1, ... ] ). Used for FlattenLayer.
-
- ARM_COMPUTE_ERROR_ON(input->num_dimensions() < 3);
-
TensorShape output_shape{ input->tensor_shape() };
-
- const size_t flatten_shape = input->dimension(0) * input->dimension(1) * input->dimension(2);
- output_shape.set(0, flatten_shape);
- output_shape.remove_dimension(1);
- output_shape.remove_dimension(1);
+ output_shape.collapse(3, 0);
return output_shape;
}
@@ -403,20 +396,25 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo
}
template <typename T>
-inline TensorShape get_shape_from_info(T *info)
+inline TensorShape extract_shape(T *data)
+{
+ return data->info()->tensor_shape();
+}
+
+inline TensorShape extract_shape(ITensorInfo *data)
{
- return info->info()->tensor_shape();
+ return data->tensor_shape();
}
-inline TensorShape get_shape_from_info(ITensorInfo *info)
+inline TensorShape extract_shape(const TensorShape *data)
{
- return info->tensor_shape();
+ return *data;
}
template <typename T>
inline TensorShape calculate_depth_concatenate_shape(const std::vector<T *> &inputs_vector)
{
- TensorShape out_shape = get_shape_from_info(inputs_vector[0]);
+ TensorShape out_shape = extract_shape(inputs_vector[0]);
size_t max_x = 0;
size_t max_y = 0;
@@ -425,7 +423,7 @@ inline TensorShape calculate_depth_concatenate_shape(const std::vector<T *> &inp
for(const auto &tensor : inputs_vector)
{
ARM_COMPUTE_ERROR_ON(tensor == nullptr);
- const TensorShape shape = get_shape_from_info(tensor);
+ const TensorShape shape = extract_shape(tensor);
max_x = std::max(shape.x(), max_x);
max_y = std::max(shape.y(), max_y);
depth += shape.z();
@@ -441,13 +439,13 @@ inline TensorShape calculate_depth_concatenate_shape(const std::vector<T *> &inp
template <typename T>
inline TensorShape calculate_width_concatenate_shape(const std::vector<T *> &inputs_vector)
{
- TensorShape out_shape = get_shape_from_info(inputs_vector[0]);
+ TensorShape out_shape = extract_shape(inputs_vector[0]);
size_t width = 0;
for(const auto &tensor : inputs_vector)
{
ARM_COMPUTE_ERROR_ON(tensor == nullptr);
- const TensorShape shape = get_shape_from_info(tensor);
+ const TensorShape shape = extract_shape(tensor);
width += shape.x();
}
diff --git a/arm_compute/graph/GraphBuilder.h b/arm_compute/graph/GraphBuilder.h
index 6359e05a63..191848c15f 100644
--- a/arm_compute/graph/GraphBuilder.h
+++ b/arm_compute/graph/GraphBuilder.h
@@ -128,8 +128,8 @@ public:
* @return Node ID of the created node, EmptyNodeID in case of error
*/
static NodeID add_convolution_node(Graph &g, NodeParams params, NodeIdxPair input,
- Size2D kernel_spatial_extend, unsigned int depth, PadStrideInfo conv_info,
- unsigned int num_groups = 1, ConvolutionMethod method = ConvolutionMethod::DEFAULT, FastMathHint fast_math_hint = FastMathHint::DISABLED,
+ Size2D kernel_spatial_extend, unsigned int depth, PadStrideInfo conv_info, unsigned int num_groups = 1,
+ ConvolutionMethod method = ConvolutionMethod::Default, FastMathHint fast_math_hint = FastMathHint::Disabled,
ITensorAccessorUPtr weights_accessor = nullptr, ITensorAccessorUPtr bias_accessor = nullptr,
const QuantizationInfo weights_quant_info = QuantizationInfo(),
const QuantizationInfo out_quant_info = QuantizationInfo());
@@ -155,10 +155,11 @@ public:
* @param[in] g Graph to add the node to
* @param[in] params Common node parameters
* @param[in] inputs Inputs to the depth concatenate layer node as a NodeID-Index pair
+ * @param[in] axis Concatenation axis
*
* @return Node ID of the created node, EmptyNodeID in case of error
*/
- static NodeID add_depth_concatenate_node(Graph &g, NodeParams params, std::vector<NodeIdxPair> inputs);
+ static NodeID add_concatenate_node(Graph &g, NodeParams params, std::vector<NodeIdxPair> inputs, DataLayoutDimension axis);
/** Adds a depth-wise convolution layer node to the graph
*
* @param[in] g Graph to add the node to
@@ -175,7 +176,7 @@ public:
*/
static NodeID add_depthwise_convolution_node(Graph &g, NodeParams params, NodeIdxPair input,
Size2D kernel_spatial_extend, PadStrideInfo conv_info,
- DepthwiseConvolutionMethod method = DepthwiseConvolutionMethod::DEFAULT,
+ DepthwiseConvolutionMethod method = DepthwiseConvolutionMethod::Default,
ITensorAccessorUPtr weights_accessor = nullptr, ITensorAccessorUPtr bias_accessor = nullptr, const QuantizationInfo quant_info = QuantizationInfo());
/** Adds an element-wise layer node to the graph
*
diff --git a/arm_compute/graph/INodeVisitor.h b/arm_compute/graph/INodeVisitor.h
index b5446c4a55..ad390ad760 100644
--- a/arm_compute/graph/INodeVisitor.h
+++ b/arm_compute/graph/INodeVisitor.h
@@ -51,6 +51,11 @@ public:
* @param[in] n Node to visit.
*/
virtual void visit(BatchNormalizationLayerNode &n) = 0;
+ /** Visit ConcatenateLayerNode.
+ *
+ * @param[in] n Node to visit.
+ */
+ virtual void visit(ConcatenateLayerNode &n) = 0;
/** Visit ConstNode.
*
* @param[in] n Node to visit.
@@ -61,11 +66,6 @@ public:
* @param[in] n Node to visit.
*/
virtual void visit(ConvolutionLayerNode &n) = 0;
- /** Visit DepthConcatenateLayerNode.
- *
- * @param[in] n Node to visit.
- */
- virtual void visit(DepthConcatenateLayerNode &n) = 0;
/** Visit DepthwiseConvolutionLayerNode.
*
* @param[in] n Node to visit.
@@ -148,15 +148,15 @@ public:
{
default_visit();
}
- virtual void visit(ConstNode &n) override
+ virtual void visit(ConcatenateLayerNode &n) override
{
default_visit();
}
- virtual void visit(ConvolutionLayerNode &n) override
+ virtual void visit(ConstNode &n) override
{
default_visit();
}
- virtual void visit(DepthConcatenateLayerNode &n) override
+ virtual void visit(ConvolutionLayerNode &n) override
{
default_visit();
}
diff --git a/arm_compute/graph/TypePrinter.h b/arm_compute/graph/TypePrinter.h
index c3601f2373..7c0bd8cfdd 100644
--- a/arm_compute/graph/TypePrinter.h
+++ b/arm_compute/graph/TypePrinter.h
@@ -71,15 +71,15 @@ inline ::std::ostream &operator<<(::std::ostream &os, const NodeType &node_type)
case NodeType::ChannelShuffleLayer:
os << "ChannelShuffleLayer";
break;
+ case NodeType::ConcatenateLayer:
+ os << "ConcatenateLayer";
+ break;
case NodeType::ConvolutionLayer:
os << "ConvolutionLayer";
break;
case NodeType::DeconvolutionLayer:
os << "DeconvolutionLayer";
break;
- case NodeType::DepthConcatenateLayer:
- os << "DepthConcatenateLayer";
- break;
case NodeType::DepthwiseConvolutionLayer:
os << "DepthwiseConvolutionLayer";
break;
@@ -134,14 +134,14 @@ inline ::std::ostream &operator<<(::std::ostream &os, const EltwiseOperation &el
{
switch(eltwise_op)
{
- case EltwiseOperation::ADD:
- os << "ADD";
+ case EltwiseOperation::Add:
+ os << "Add";
break;
- case EltwiseOperation::MUL:
- os << "MUL";
+ case EltwiseOperation::Mul:
+ os << "Mul";
break;
- case EltwiseOperation::SUB:
- os << "SUB";
+ case EltwiseOperation::Sub:
+ os << "Sub";
break;
default:
ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
@@ -155,17 +155,17 @@ inline ::std::ostream &operator<<(::std::ostream &os, const ConvolutionMethod &m
{
switch(method)
{
- case ConvolutionMethod::DEFAULT:
- os << "DEFAULT";
+ case ConvolutionMethod::Default:
+ os << "Default";
break;
- case ConvolutionMethod::DIRECT:
- os << "DIRECT";
+ case ConvolutionMethod::Direct:
+ os << "Direct";
break;
case ConvolutionMethod::GEMM:
os << "GEMM";
break;
- case ConvolutionMethod::WINOGRAD:
- os << "WINOGRAD";
+ case ConvolutionMethod::Winograd:
+ os << "Winograd";
break;
default:
ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
@@ -179,11 +179,11 @@ inline ::std::ostream &operator<<(::std::ostream &os, const FastMathHint &hint)
{
switch(hint)
{
- case FastMathHint::ENABLED:
- os << "ENABLED";
+ case FastMathHint::Enabled:
+ os << "Enabled";
break;
- case FastMathHint::DISABLED:
- os << "DISABLED";
+ case FastMathHint::Disabled:
+ os << "Disabled";
break;
default:
ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
@@ -197,14 +197,14 @@ inline ::std::ostream &operator<<(::std::ostream &os, const DepthwiseConvolution
{
switch(method)
{
- case DepthwiseConvolutionMethod::DEFAULT:
+ case DepthwiseConvolutionMethod::Default:
os << "DEFAULT";
break;
case DepthwiseConvolutionMethod::GEMV:
os << "GEMV";
break;
- case DepthwiseConvolutionMethod::OPTIMIZED_3x3:
- os << "OPTIMIZED_3x3";
+ case DepthwiseConvolutionMethod::Optimized3x3:
+ os << "Optimized3x3";
break;
default:
ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
diff --git a/arm_compute/graph/Types.h b/arm_compute/graph/Types.h
index c5b7fb1c51..f22f50ac82 100644
--- a/arm_compute/graph/Types.h
+++ b/arm_compute/graph/Types.h
@@ -96,33 +96,33 @@ enum class Target
/** Supported Element-wise operations */
enum class EltwiseOperation
{
- ADD, /**< Arithmetic addition */
- SUB, /**< Arithmetic subtraction */
- MUL /**< Arithmetic multiplication */
+ Add, /**< Arithmetic addition */
+ Sub, /**< Arithmetic subtraction */
+ Mul /**< Arithmetic multiplication */
};
/** Supported Convolution layer methods */
enum class ConvolutionMethod
{
- DEFAULT, /**< Default approach using internal heuristics */
+ Default, /**< Default approach using internal heuristics */
GEMM, /**< GEMM based convolution */
- DIRECT, /**< Deep direct convolution */
- WINOGRAD /**< Winograd based convolution */
+ Direct, /**< Deep direct convolution */
+ Winograd /**< Winograd based convolution */
};
/** Supported Depthwise Convolution layer methods */
enum class DepthwiseConvolutionMethod
{
- DEFAULT, /**< Default approach using internal heuristics */
- GEMV, /**< Generic GEMV based depthwise convolution */
- OPTIMIZED_3x3, /**< Optimized 3x3 direct depthwise convolution */
+ Default, /**< Default approach using internal heuristics */
+ GEMV, /**< Generic GEMV based depthwise convolution */
+ Optimized3x3, /**< Optimized 3x3 direct depthwise convolution */
};
/** Enable or disable fast math for Convolution layer */
enum class FastMathHint
{
- ENABLED, /**< Fast math enabled for Convolution layer */
- DISABLED, /**< Fast math disabled for Convolution layer */
+ Enabled, /**< Fast math enabled for Convolution layer */
+ Disabled, /**< Fast math disabled for Convolution layer */
};
/** Supported nodes */
@@ -131,9 +131,9 @@ enum class NodeType
ActivationLayer,
BatchNormalizationLayer,
ChannelShuffleLayer,
+ ConcatenateLayer,
ConvolutionLayer,
DeconvolutionLayer,
- DepthConcatenateLayer,
DepthwiseConvolutionLayer,
EltwiseLayer,
FlattenLayer,
diff --git a/arm_compute/graph/backends/FunctionHelpers.h b/arm_compute/graph/backends/FunctionHelpers.h
index 978d3bc1a8..172f00277e 100644
--- a/arm_compute/graph/backends/FunctionHelpers.h
+++ b/arm_compute/graph/backends/FunctionHelpers.h
@@ -192,6 +192,52 @@ std::unique_ptr<IFunction> create_channel_shuffle_layer(ChannelShuffleLayerNode
return std::move(func);
}
+/** Create a backend layer concatenate function
+ *
+ * @tparam ConcatenateLayerFunction Backend concatenate function
+ * @tparam TargetInfo Target-specific information
+ *
+ * @param[in] node Node to create the backend function for
+ *
+ * @return Backend concatenate layer function
+ */
+template <typename ConcatenateLayerFunction, typename TargetInfo>
+std::unique_ptr<arm_compute::IFunction> create_concatenate_layer(ConcatenateLayerNode &node)
+{
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating Concatenate node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
+ ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
+
+ // Return nullptr if depth concatenate is switched off
+ if(!node.is_enabled())
+ {
+ return nullptr;
+ }
+
+ // Extract IO and info
+ std::vector<typename TargetInfo::TensorType *> inputs;
+ for(unsigned int i = 0; i < node.num_inputs(); ++i)
+ {
+ inputs.push_back(get_backing_tensor<TargetInfo>(node.input(i)));
+ }
+ typename TargetInfo::TensorType *output = get_backing_tensor<TargetInfo>(node.output(0));
+ const DataLayoutDimension concat_axis = node.concatenation_axis();
+
+ // Create and configure function
+ auto func = support::cpp14::make_unique<ConcatenateLayerFunction>();
+ func->configure(inputs, output, concat_axis);
+
+ // Log info
+ ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << node.type()
+ << " Target " << TargetInfo::TargetType
+ << " Data Type: " << output->info()->data_type()
+ << " Shape: " << output->info()->tensor_shape()
+ << " Num Inputs: " << inputs.size()
+ << " Axis: " << concat_axis
+ << std::endl);
+
+ return std::move(func);
+}
+
/** Create a backend convolution layer function
*
* @tparam ConvolutionLayerFunctions Backend convolution functions
@@ -220,20 +266,20 @@ std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node,
const PadStrideInfo conv_info = node.convolution_info();
const ConvolutionMethod conv_algorithm = node.convolution_method();
- const bool fast_math = node.fast_math_hint() == FastMathHint::ENABLED;
+ const bool fast_math = node.fast_math_hint() == FastMathHint::Enabled;
// Create and configure function (we assume that functions have been validated before creation)
std::shared_ptr<IMemoryManager> mm = get_memory_manager(ctx, TargetInfo::TargetType);
std::unique_ptr<IFunction> func;
std::string func_name;
- if(conv_algorithm == ConvolutionMethod::WINOGRAD)
+ if(conv_algorithm == ConvolutionMethod::Winograd)
{
std::tie(func, func_name) = create_named_memory_managed_function<typename ConvolutionLayerFunctions::WinogradConvolutionLayer>(
std::string("WinogradConvolutionLayer"), mm,
input, weights, biases, output, conv_info, ActivationLayerInfo(), fast_math);
}
- else if(conv_algorithm == ConvolutionMethod::DIRECT)
+ else if(conv_algorithm == ConvolutionMethod::Direct)
{
std::tie(func, func_name) = create_named_function<typename ConvolutionLayerFunctions::DirectConvolutionLayer>(
std::string("DirectConvolutionLayer"),
@@ -308,50 +354,6 @@ std::unique_ptr<IFunction> create_deconvolution_layer(DeconvolutionLayerNode &no
return func;
}
-/** Create a backend layer depth concatenate function
- *
- * @tparam DepthConcatenateLayerFunction Backend depth concatenate function
- * @tparam TargetInfo Target-specific information
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend depth concatenate layer function
- */
-template <typename DepthConcatenateLayerFunction, typename TargetInfo>
-std::unique_ptr<arm_compute::IFunction> create_depth_concatenate_layer(DepthConcatenateLayerNode &node)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating DepthConcatenate node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Return nullptr if depth concatenate is switched off
- if(!node.is_enabled())
- {
- return nullptr;
- }
-
- // Extract IO and info
- std::vector<typename TargetInfo::TensorType *> inputs;
- for(unsigned int i = 0; i < node.num_inputs(); ++i)
- {
- inputs.push_back(get_backing_tensor<TargetInfo>(node.input(i)));
- }
- typename TargetInfo::TensorType *output = get_backing_tensor<TargetInfo>(node.output(0));
-
- // Create and configure function
- auto func = support::cpp14::make_unique<DepthConcatenateLayerFunction>();
- func->configure(inputs, output);
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << node.type()
- << " Target " << TargetInfo::TargetType
- << " Data Type: " << output->info()->data_type()
- << " Shape: " << output->info()->tensor_shape()
- << " Num Inputs: " << inputs.size()
- << std::endl);
-
- return std::move(func);
-}
-
/** Create a backend layer depth-wise convolution function
*
* @tparam DepthwiseConvolutionLayerFunctions Backend depthwise convolution function
@@ -383,7 +385,7 @@ std::unique_ptr<IFunction> create_depthwise_convolution_layer(DepthwiseConvoluti
// Create and configure function (we assume that functions have been validated before creation)
std::unique_ptr<IFunction> func;
std::string func_name;
- if(dwc_algorithm == DepthwiseConvolutionMethod::OPTIMIZED_3x3)
+ if(dwc_algorithm == DepthwiseConvolutionMethod::Optimized3x3)
{
std::tie(func, func_name) = create_named_function<typename DepthwiseConvolutionLayerFunctions::DepthwiseConvolutionLayer3x3>(
std::string("DepthwiseConvolutionLayer3x3"),
@@ -435,19 +437,19 @@ std::unique_ptr<IFunction> create_eltwise_layer(EltwiseLayerNode &node)
std::unique_ptr<IFunction> func = nullptr;
std::string func_name;
- if(eltwise_op == EltwiseOperation::ADD)
+ if(eltwise_op == EltwiseOperation::Add)
{
std::tie(func, func_name) = create_named_function<typename EltwiseFunctions::Addition>(
std::string("ArithmeticAddition"),
input1, input2, output, convert_policy);
}
- else if(eltwise_op == EltwiseOperation::SUB)
+ else if(eltwise_op == EltwiseOperation::Sub)
{
std::tie(func, func_name) = create_named_function<typename EltwiseFunctions::Subtraction>(
std::string("ArithmeticSubtraction"),
input1, input2, output, convert_policy);
}
- else if(eltwise_op == EltwiseOperation::MUL)
+ else if(eltwise_op == EltwiseOperation::Mul)
{
std::tie(func, func_name) = create_named_function<typename EltwiseFunctions::Multiplication>(
std::string("PixelWiseMultiplication"),
@@ -487,11 +489,12 @@ std::unique_ptr<IFunction> create_flatten_layer(FlattenLayerNode &node)
typename TargetInfo::TensorType *input = get_backing_tensor<TargetInfo>(node.input(0));
typename TargetInfo::TensorType *output = get_backing_tensor<TargetInfo>(node.output(0));
+ ARM_COMPUTE_ERROR_ON(input == nullptr);
+ ARM_COMPUTE_ERROR_ON(output == nullptr);
+
// Create and configure function
auto func = support::cpp14::make_unique<FlattenLayerFunction>();
func->configure(input, output);
- ARM_COMPUTE_ERROR_ON(input == nullptr);
- ARM_COMPUTE_ERROR_ON(output == nullptr);
// Log info
ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << node.type()
@@ -526,13 +529,14 @@ std::unique_ptr<IFunction> create_fully_connected_layer(FullyConnectedLayerNode
typename TargetInfo::TensorType *output = get_backing_tensor<TargetInfo>(node.output(0));
const FullyConnectedLayerInfo fc_info = node.info();
- // Create and configure function
- auto func = support::cpp14::make_unique<FullyConnectedLayerFunction>(get_memory_manager(ctx, TargetInfo::TargetType));
- func->configure(input, weights, biases, output, fc_info);
ARM_COMPUTE_ERROR_ON(input == nullptr);
ARM_COMPUTE_ERROR_ON(weights == nullptr);
ARM_COMPUTE_ERROR_ON(output == nullptr);
+ // Create and configure function
+ auto func = support::cpp14::make_unique<FullyConnectedLayerFunction>(get_memory_manager(ctx, TargetInfo::TargetType));
+ func->configure(input, weights, biases, output, fc_info);
+
// Log info
ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << node.type()
<< " Target " << TargetInfo::TargetType
diff --git a/arm_compute/graph/backends/ValidateHelpers.h b/arm_compute/graph/backends/ValidateHelpers.h
index 189fbdc9c7..ae52593b03 100644
--- a/arm_compute/graph/backends/ValidateHelpers.h
+++ b/arm_compute/graph/backends/ValidateHelpers.h
@@ -106,22 +106,22 @@ Status validate_convolution_layer(ConvolutionLayerNode &node)
const PadStrideInfo conv_info = node.convolution_info();
const ConvolutionMethod conv_algorithm = node.convolution_method();
- //const bool fast_math = node.fast_math_hint() == FastMathHint::ENABLED; // FIXME (COMPMID-1138): uncomment once NEON and GLES support fast_math
+ const bool fast_math = node.fast_math_hint() == FastMathHint::Enabled;
// Validate function
Status status{};
switch(conv_algorithm)
{
- case ConvolutionMethod::DIRECT:
+ case ConvolutionMethod::Direct:
status = DirectConvolutionLayer::validate(input, weights, biases, output, conv_info);
break;
case ConvolutionMethod::GEMM:
status = GEMMConvolutionLayer::validate(input, weights, biases, output, conv_info);
break;
- case ConvolutionMethod::WINOGRAD:
- status = WinogradConvolutionLayer::validate(input, weights, biases, output, conv_info /*, fast_math*/);
+ case ConvolutionMethod::Winograd:
+ status = WinogradConvolutionLayer::validate(input, weights, biases, output, conv_info, ActivationLayerInfo(), fast_math);
break;
- case ConvolutionMethod::DEFAULT:
+ case ConvolutionMethod::Default:
status = ConvolutionLayer::validate(input, weights, biases, output, conv_info);
break;
default:
@@ -136,7 +136,7 @@ Status validate_convolution_layer(ConvolutionLayerNode &node)
{
ARM_COMPUTE_LOG_GRAPH_INFO("Switched ConvolutionLayer method of node with ID : "
<< node.id() << " and Name: " << node.name() << std::endl);
- node.set_convolution_method(ConvolutionMethod::DEFAULT);
+ node.set_convolution_method(ConvolutionMethod::Default);
}
}
@@ -166,11 +166,11 @@ Status validate_depthwise_convolution_layer(DepthwiseConvolutionLayerNode &node)
// TODO (geopin01) : Switch when validation is implemented
// Validate function
- if((dwc_algorithm == DepthwiseConvolutionMethod::OPTIMIZED_3x3) && (weights->tensor_shape()[get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH)] != 3))
+ if((dwc_algorithm == DepthwiseConvolutionMethod::Optimized3x3) && (weights->tensor_shape()[get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH)] != 3))
{
ARM_COMPUTE_LOG_GRAPH_INFO("Switched DepthwiseConvolutionLayer method of node with ID : "
<< node.id() << " and Name: " << node.name() << std::endl);
- node.set_depthwise_convolution_method(DepthwiseConvolutionMethod::DEFAULT);
+ node.set_depthwise_convolution_method(DepthwiseConvolutionMethod::Default);
}
return Status{};
diff --git a/arm_compute/graph/frontend/Layers.h b/arm_compute/graph/frontend/Layers.h
index 197d2ea409..02ef56952d 100644
--- a/arm_compute/graph/frontend/Layers.h
+++ b/arm_compute/graph/frontend/Layers.h
@@ -595,7 +595,7 @@ public:
}
else if(_branch_merge_method == BranchMergeMethod::DEPTH_CONCATENATE)
{
- // Collect tail nodes and perform DepthConcatenate
+ // Collect tail nodes and concatenate
std::vector<NodeIdxPair> nodes;
for(auto &ss : _sub_streams)
{
@@ -608,14 +608,14 @@ public:
}
}
}
- nid = GraphBuilder::add_depth_concatenate_node(s.graph(), common_params, nodes);
+ nid = GraphBuilder::add_concatenate_node(s.graph(), common_params, nodes, DataLayoutDimension::CHANNEL);
}
else
{
ARM_COMPUTE_ERROR_ON(_sub_streams.size() != 2);
NodeIdxPair input0 = { _sub_streams[0]->tail_node(), 0 };
NodeIdxPair input1 = { _sub_streams[1]->tail_node(), 0 };
- nid = GraphBuilder::add_elementwise_node(s.graph(), common_params, input0, input1, EltwiseOperation::ADD);
+ nid = GraphBuilder::add_elementwise_node(s.graph(), common_params, input0, input1, EltwiseOperation::Add);
}
return nid;
}
diff --git a/arm_compute/graph/frontend/Types.h b/arm_compute/graph/frontend/Types.h
index cd579e2119..f9d4952765 100644
--- a/arm_compute/graph/frontend/Types.h
+++ b/arm_compute/graph/frontend/Types.h
@@ -64,9 +64,9 @@ enum class BranchMergeMethod
struct StreamHints
{
Target target_hint = { Target::UNSPECIFIED }; /**< Target execution hint */
- ConvolutionMethod convolution_method_hint = { ConvolutionMethod::DEFAULT }; /**< Convolution method hint */
- DepthwiseConvolutionMethod depthwise_convolution_method_hint = { DepthwiseConvolutionMethod::DEFAULT }; /**< Depthwise Convolution method hint */
- FastMathHint fast_math_hint = { FastMathHint::DISABLED }; /**< Fast math hint */
+ ConvolutionMethod convolution_method_hint = { ConvolutionMethod::Default }; /**< Convolution method hint */
+ DepthwiseConvolutionMethod depthwise_convolution_method_hint = { DepthwiseConvolutionMethod::Default }; /**< Depthwise Convolution method hint */
+ FastMathHint fast_math_hint = { FastMathHint::Disabled }; /**< Fast math hint */
};
} // namespace frontend
} // namespace graph
diff --git a/arm_compute/graph/nodes/DepthConcatenateLayerNode.h b/arm_compute/graph/nodes/ConcatenateLayerNode.h
index ffdec709ef..20c8523752 100644
--- a/arm_compute/graph/nodes/DepthConcatenateLayerNode.h
+++ b/arm_compute/graph/nodes/ConcatenateLayerNode.h
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef __ARM_COMPUTE_GRAPH_DEPTH_CONCATENATE_LAYER_NODE_H__
-#define __ARM_COMPUTE_GRAPH_DEPTH_CONCATENATE_LAYER_NODE_H__
+#ifndef __ARM_COMPUTE_GRAPH_CONCATENATE_LAYER_NODE_H__
+#define __ARM_COMPUTE_GRAPH_CONCATENATE_LAYER_NODE_H__
#include "arm_compute/graph/INode.h"
@@ -30,30 +30,31 @@ namespace arm_compute
{
namespace graph
{
-/** Depth Concatenation Layer node */
-class DepthConcatenateLayerNode final : public INode
+/** Concatenation Layer node */
+class ConcatenateLayerNode final : public INode
{
public:
/** Constructor
*
* @param[in] total_nodes Number of nodes that will get concatenated
+ * @param[in] axis Concatenation axis
*/
- DepthConcatenateLayerNode(unsigned int total_nodes);
- /** Computes depth concatenations output descriptor
+ ConcatenateLayerNode(unsigned int total_nodes, DataLayoutDimension axis);
+ /** Computes concatenations output descriptor
*
* @param[in] input_descriptors Input descriptors
+ * @param[in] axis Concatenation axis
*
* @return Expected output descriptor
*/
- static TensorDescriptor compute_output_descriptor(const std::vector<TensorDescriptor> &input_descriptors);
+ static TensorDescriptor compute_output_descriptor(const std::vector<TensorDescriptor> &input_descriptors, DataLayoutDimension axis);
/** Disables or not the depth concatenate node
*
- * @warning This is used when depth concatenate is performed with sub-tensors,
- * where this node is used as a placeholder.
+ * @warning This is used when concatenate is performed using sub-tensors, where this node is used as a placeholder.
*
- * @param[in] is_enabled If true a backend function is created to perform the depth concatenation (involves copying),
- * while if false, no function is created and we assume that subtensors are properly set to simulate
- * a no copy operation.
+ * @param[in] is_enabled If true a backend function is created to perform the concatenation (involves copying),
+ * while if false, no function is created and we assume that sub-tensors are properly set to simulate
+ * a zero copy operation.
*/
void set_enabled(bool is_enabled);
/** Enabled parameter accessor
@@ -61,6 +62,11 @@ public:
* @return True if a backend function is to be created else false
*/
bool is_enabled() const;
+ /** Concatenation axis parameter accessor
+ *
+ * @return Concatenation axis
+ */
+ DataLayoutDimension concatenation_axis() const;
// Inherited overridden methods:
NodeType type() const override;
@@ -69,9 +75,10 @@ public:
void accept(INodeVisitor &v) override;
private:
- unsigned int _total_nodes;
- bool _is_enabled;
+ unsigned int _total_nodes;
+ DataLayoutDimension _axis;
+ bool _is_enabled;
};
} // namespace graph
} // namespace arm_compute
-#endif /* __ARM_COMPUTE_GRAPH_DEPTH_CONCATENATE_LAYER_NODE_H__ */
+#endif /* __ARM_COMPUTE_GRAPH_CONCATENATE_LAYER_NODE_H__ */
diff --git a/arm_compute/graph/nodes/ConvolutionLayerNode.h b/arm_compute/graph/nodes/ConvolutionLayerNode.h
index aca60283d7..4299be6bb5 100644
--- a/arm_compute/graph/nodes/ConvolutionLayerNode.h
+++ b/arm_compute/graph/nodes/ConvolutionLayerNode.h
@@ -41,8 +41,10 @@ public:
* @param[in] fast_math_hint (Optional) Fast math hint
* @param[in] out_quant_info (Optional) Output quantization info
*/
- ConvolutionLayerNode(PadStrideInfo info, ConvolutionMethod method = ConvolutionMethod::DEFAULT, FastMathHint fast_math_hint = FastMathHint::DISABLED,
- QuantizationInfo out_quant_info = QuantizationInfo());
+ ConvolutionLayerNode(PadStrideInfo info,
+ ConvolutionMethod method = ConvolutionMethod::Default,
+ FastMathHint fast_math_hint = FastMathHint::Disabled,
+ QuantizationInfo out_quant_info = QuantizationInfo());
/** Sets the convolution layer method to use
*
* @param[in] method Method to use for convolution
diff --git a/arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h b/arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h
index df6f456ac9..1a173c5421 100644
--- a/arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h
+++ b/arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h
@@ -39,7 +39,7 @@ public:
* @param[in] info Convolution layer attributes
* @param[in] method Depthwise convolution method to use
*/
- DepthwiseConvolutionLayerNode(PadStrideInfo info, DepthwiseConvolutionMethod method = DepthwiseConvolutionMethod::DEFAULT);
+ DepthwiseConvolutionLayerNode(PadStrideInfo info, DepthwiseConvolutionMethod method = DepthwiseConvolutionMethod::Default);
/** Sets the depthwise convolution method to use
*
* @param[in] method Depthwise convolution method to use
diff --git a/arm_compute/graph/nodes/Nodes.h b/arm_compute/graph/nodes/Nodes.h
index 97aa191916..f2e751e15f 100644
--- a/arm_compute/graph/nodes/Nodes.h
+++ b/arm_compute/graph/nodes/Nodes.h
@@ -27,10 +27,10 @@
#include "arm_compute/graph/nodes/ActivationLayerNode.h"
#include "arm_compute/graph/nodes/BatchNormalizationLayerNode.h"
#include "arm_compute/graph/nodes/ChannelShuffleLayerNode.h"
+#include "arm_compute/graph/nodes/ConcatenateLayerNode.h"
#include "arm_compute/graph/nodes/ConstNode.h"
#include "arm_compute/graph/nodes/ConvolutionLayerNode.h"
#include "arm_compute/graph/nodes/DeconvolutionLayerNode.h"
-#include "arm_compute/graph/nodes/DepthConcatenateLayerNode.h"
#include "arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h"
#include "arm_compute/graph/nodes/DummyNode.h"
#include "arm_compute/graph/nodes/EltwiseLayerNode.h"
diff --git a/arm_compute/graph/nodes/NodesFwd.h b/arm_compute/graph/nodes/NodesFwd.h
index 05979d796c..a0a9146dc4 100644
--- a/arm_compute/graph/nodes/NodesFwd.h
+++ b/arm_compute/graph/nodes/NodesFwd.h
@@ -33,10 +33,10 @@ class INode;
class ActivationLayerNode;
class BatchNormalizationLayerNode;
class ChannelShuffleLayerNode;
+class ConcatenateLayerNode;
class ConstNode;
class ConvolutionLayerNode;
class DeconvolutionLayerNode;
-class DepthConcatenateLayerNode;
class DepthwiseConvolutionLayerNode;
class DummyNode;
class EltwiseLayerNode;
diff --git a/arm_compute/graph/printers/DotGraphPrinter.h b/arm_compute/graph/printers/DotGraphPrinter.h
index 1d355a52ee..d4cf6928e5 100644
--- a/arm_compute/graph/printers/DotGraphPrinter.h
+++ b/arm_compute/graph/printers/DotGraphPrinter.h
@@ -52,8 +52,8 @@ public:
// Inherited methods overridden
void visit(ActivationLayerNode &n) override;
void visit(BatchNormalizationLayerNode &n) override;
+ void visit(ConcatenateLayerNode &n) override;
void visit(ConvolutionLayerNode &n) override;
- void visit(DepthConcatenateLayerNode &n) override;
void visit(DepthwiseConvolutionLayerNode &n) override;
void visit(EltwiseLayerNode &n) override;
void visit(NormalizationLayerNode &n) override;
diff --git a/examples/graph_alexnet.cpp b/examples/graph_alexnet.cpp
index 63e7b16128..944a435c3b 100644
--- a/examples/graph_alexnet.cpp
+++ b/examples/graph_alexnet.cpp
@@ -60,7 +60,6 @@ public:
// Checks
ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "Unsupported data type!");
- ARM_COMPUTE_EXIT_ON_MSG(common_params.data_layout == DataLayout::NHWC, "Unsupported data layout!");
// Print parameter values
std::cout << common_params << std::endl;
@@ -72,14 +71,20 @@ public:
const std::array<float, 3> mean_rgb{ { 122.68f, 116.67f, 104.01f } };
std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb);
+ // Create input descriptor
+ const TensorShape tensor_shape = permute_shape(TensorShape(227U, 227U, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
+ TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
+
+ // Set weights trained layout
+ const DataLayout weights_layout = DataLayout::NCHW;
+
graph << common_params.target
<< common_params.fast_math_hint
- << InputLayer(TensorDescriptor(TensorShape(227U, 227U, 3U, 1U), common_params.data_type),
- get_input_accessor(common_params, std::move(preprocessor)))
+ << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor)))
// Layer 1
<< ConvolutionLayer(
11U, 11U, 96U,
- get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv1_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv1_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv1_b.npy"),
PadStrideInfo(4, 4, 0, 0))
.set_name("conv1")
@@ -89,7 +94,7 @@ public:
// Layer 2
<< ConvolutionLayer(
5U, 5U, 256U,
- get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv2_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv2_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv2_b.npy"),
PadStrideInfo(1, 1, 2, 2), 2)
.set_name("conv2")
@@ -99,7 +104,7 @@ public:
// Layer 3
<< ConvolutionLayer(
3U, 3U, 384U,
- get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv3_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv3_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv3_b.npy"),
PadStrideInfo(1, 1, 1, 1))
.set_name("conv3")
@@ -107,7 +112,7 @@ public:
// Layer 4
<< ConvolutionLayer(
3U, 3U, 384U,
- get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv4_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv4_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv4_b.npy"),
PadStrideInfo(1, 1, 1, 1), 2)
.set_name("conv4")
@@ -115,7 +120,7 @@ public:
// Layer 5
<< ConvolutionLayer(
3U, 3U, 256U,
- get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv5_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv5_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv5_b.npy"),
PadStrideInfo(1, 1, 1, 1), 2)
.set_name("conv5")
@@ -124,21 +129,21 @@ public:
// Layer 6
<< FullyConnectedLayer(
4096U,
- get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc6_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc6_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc6_b.npy"))
.set_name("fc6")
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu6")
// Layer 7
<< FullyConnectedLayer(
4096U,
- get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc7_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc7_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc7_b.npy"))
.set_name("fc7")
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu7")
// Layer 8
<< FullyConnectedLayer(
1000U,
- get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc8_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc8_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc8_b.npy"))
.set_name("fc8")
// Softmax
diff --git a/examples/graph_googlenet.cpp b/examples/graph_googlenet.cpp
index 4497dbd470..d5bd0c0552 100644
--- a/examples/graph_googlenet.cpp
+++ b/examples/graph_googlenet.cpp
@@ -60,7 +60,6 @@ public:
// Checks
ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "Unsupported data type!");
- ARM_COMPUTE_EXIT_ON_MSG(common_params.data_layout == DataLayout::NHWC, "Unsupported data layout!");
// Print parameter values
std::cout << common_params << std::endl;
@@ -72,13 +71,19 @@ public:
const std::array<float, 3> mean_rgb{ { 122.68f, 116.67f, 104.01f } };
std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb);
+ // Create input descriptor
+ const TensorShape tensor_shape = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
+ TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
+
+ // Set weights trained layout
+ const DataLayout weights_layout = DataLayout::NCHW;
+
graph << common_params.target
<< common_params.fast_math_hint
- << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), common_params.data_type),
- get_input_accessor(common_params, std::move(preprocessor)))
+ << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor)))
<< ConvolutionLayer(
7U, 7U, 64U,
- get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv1/conv1_7x7_s2_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv1/conv1_7x7_s2_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv1/conv1_7x7_s2_b.npy"),
PadStrideInfo(2, 2, 3, 3))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
@@ -86,33 +91,33 @@ public:
<< NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f))
<< ConvolutionLayer(
1U, 1U, 64U,
- get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_reduce_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_reduce_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_reduce_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< ConvolutionLayer(
3U, 3U, 192U,
- get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_b.npy"),
PadStrideInfo(1, 1, 1, 1))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f))
<< PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)));
- graph << get_inception_node(data_path, "inception_3a", 64, std::make_tuple(96U, 128U), std::make_tuple(16U, 32U), 32U);
- graph << get_inception_node(data_path, "inception_3b", 128, std::make_tuple(128U, 192U), std::make_tuple(32U, 96U), 64U);
+ graph << get_inception_node(data_path, "inception_3a", weights_layout, 64, std::make_tuple(96U, 128U), std::make_tuple(16U, 32U), 32U);
+ graph << get_inception_node(data_path, "inception_3b", weights_layout, 128, std::make_tuple(128U, 192U), std::make_tuple(32U, 96U), 64U);
graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)));
- graph << get_inception_node(data_path, "inception_4a", 192, std::make_tuple(96U, 208U), std::make_tuple(16U, 48U), 64U);
- graph << get_inception_node(data_path, "inception_4b", 160, std::make_tuple(112U, 224U), std::make_tuple(24U, 64U), 64U);
- graph << get_inception_node(data_path, "inception_4c", 128, std::make_tuple(128U, 256U), std::make_tuple(24U, 64U), 64U);
- graph << get_inception_node(data_path, "inception_4d", 112, std::make_tuple(144U, 288U), std::make_tuple(32U, 64U), 64U);
- graph << get_inception_node(data_path, "inception_4e", 256, std::make_tuple(160U, 320U), std::make_tuple(32U, 128U), 128U);
+ graph << get_inception_node(data_path, "inception_4a", weights_layout, 192, std::make_tuple(96U, 208U), std::make_tuple(16U, 48U), 64U);
+ graph << get_inception_node(data_path, "inception_4b", weights_layout, 160, std::make_tuple(112U, 224U), std::make_tuple(24U, 64U), 64U);
+ graph << get_inception_node(data_path, "inception_4c", weights_layout, 128, std::make_tuple(128U, 256U), std::make_tuple(24U, 64U), 64U);
+ graph << get_inception_node(data_path, "inception_4d", weights_layout, 112, std::make_tuple(144U, 288U), std::make_tuple(32U, 64U), 64U);
+ graph << get_inception_node(data_path, "inception_4e", weights_layout, 256, std::make_tuple(160U, 320U), std::make_tuple(32U, 128U), 128U);
graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)));
- graph << get_inception_node(data_path, "inception_5a", 256, std::make_tuple(160U, 320U), std::make_tuple(32U, 128U), 128U);
- graph << get_inception_node(data_path, "inception_5b", 384, std::make_tuple(192U, 384U), std::make_tuple(48U, 128U), 128U);
+ graph << get_inception_node(data_path, "inception_5a", weights_layout, 256, std::make_tuple(160U, 320U), std::make_tuple(32U, 128U), 128U);
+ graph << get_inception_node(data_path, "inception_5b", weights_layout, 384, std::make_tuple(192U, 384U), std::make_tuple(48U, 128U), 128U);
graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 7, PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL)))
<< FullyConnectedLayer(
1000U,
- get_weights_accessor(data_path, "/cnn_data/googlenet_model/loss3/loss3_classifier_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/googlenet_model/loss3/loss3_classifier_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/googlenet_model/loss3/loss3_classifier_b.npy"))
<< SoftmaxLayer()
<< OutputLayer(get_output_accessor(common_params, 5));
@@ -139,7 +144,7 @@ private:
CommonGraphParams common_params;
Stream graph;
- BranchLayer get_inception_node(const std::string &data_path, std::string &&param_path,
+ BranchLayer get_inception_node(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
unsigned int a_filt,
std::tuple<unsigned int, unsigned int> b_filters,
std::tuple<unsigned int, unsigned int> c_filters,
@@ -149,7 +154,7 @@ private:
SubStream i_a(graph);
i_a << ConvolutionLayer(
1U, 1U, a_filt,
- get_weights_accessor(data_path, total_path + "1x1_w.npy"),
+ get_weights_accessor(data_path, total_path + "1x1_w.npy", weights_layout),
get_weights_accessor(data_path, total_path + "1x1_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
@@ -157,13 +162,13 @@ private:
SubStream i_b(graph);
i_b << ConvolutionLayer(
1U, 1U, std::get<0>(b_filters),
- get_weights_accessor(data_path, total_path + "3x3_reduce_w.npy"),
+ get_weights_accessor(data_path, total_path + "3x3_reduce_w.npy", weights_layout),
get_weights_accessor(data_path, total_path + "3x3_reduce_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< ConvolutionLayer(
3U, 3U, std::get<1>(b_filters),
- get_weights_accessor(data_path, total_path + "3x3_w.npy"),
+ get_weights_accessor(data_path, total_path + "3x3_w.npy", weights_layout),
get_weights_accessor(data_path, total_path + "3x3_b.npy"),
PadStrideInfo(1, 1, 1, 1))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
@@ -171,13 +176,13 @@ private:
SubStream i_c(graph);
i_c << ConvolutionLayer(
1U, 1U, std::get<0>(c_filters),
- get_weights_accessor(data_path, total_path + "5x5_reduce_w.npy"),
+ get_weights_accessor(data_path, total_path + "5x5_reduce_w.npy", weights_layout),
get_weights_accessor(data_path, total_path + "5x5_reduce_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< ConvolutionLayer(
5U, 5U, std::get<1>(c_filters),
- get_weights_accessor(data_path, total_path + "5x5_w.npy"),
+ get_weights_accessor(data_path, total_path + "5x5_w.npy", weights_layout),
get_weights_accessor(data_path, total_path + "5x5_b.npy"),
PadStrideInfo(1, 1, 2, 2))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
@@ -186,7 +191,7 @@ private:
i_d << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL)))
<< ConvolutionLayer(
1U, 1U, d_filt,
- get_weights_accessor(data_path, total_path + "pool_proj_w.npy"),
+ get_weights_accessor(data_path, total_path + "pool_proj_w.npy", weights_layout),
get_weights_accessor(data_path, total_path + "pool_proj_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
diff --git a/examples/graph_inception_v3.cpp b/examples/graph_inception_v3.cpp
index 67f4e3cacf..c41b0a808e 100644
--- a/examples/graph_inception_v3.cpp
+++ b/examples/graph_inception_v3.cpp
@@ -60,7 +60,6 @@ public:
// Checks
ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "Unsupported data type!");
- ARM_COMPUTE_EXIT_ON_MSG(common_params.data_layout == DataLayout::NHWC, "Unsupported data layout!");
// Print parameter values
std::cout << common_params << std::endl;
@@ -71,12 +70,18 @@ public:
// Create a preprocessor object
std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>();
+ // Create input descriptor
+ const TensorShape tensor_shape = permute_shape(TensorShape(299U, 299U, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
+ TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
+
+ // Set weights trained layout
+ const DataLayout weights_layout = DataLayout::NCHW;
+
graph << common_params.target
<< common_params.fast_math_hint
- << InputLayer(TensorDescriptor(TensorShape(299U, 299U, 3U, 1U), common_params.data_type),
- get_input_accessor(common_params, std::move(preprocessor), false))
+ << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false))
<< ConvolutionLayer(3U, 3U, 32U,
- get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_weights.npy"),
+ get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
.set_name("Conv2d_1a_3x3/convolution")
<< BatchNormalizationLayer(get_weights_accessor(data_path,
@@ -89,7 +94,7 @@ public:
.set_name("Conv2d_1a_3x3/BatchNorm/batchnorm")
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_1a_3x3/Relu")
<< ConvolutionLayer(3U, 3U, 32U,
- get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_weights.npy"),
+ get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
.set_name("Conv2d_2a_3x3/convolution")
<< BatchNormalizationLayer(get_weights_accessor(data_path,
@@ -103,7 +108,7 @@ public:
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_2a_3x3/Relu")
<< ConvolutionLayer(3U, 3U, 64U,
- get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_weights.npy"),
+ get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
.set_name("Conv2d_2b_3x3/convolution")
<< BatchNormalizationLayer(get_weights_accessor(data_path,
@@ -119,7 +124,7 @@ public:
<< PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("MaxPool_3a_3x3/MaxPool")
<< ConvolutionLayer(1U, 1U, 80U,
- get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_weights.npy"),
+ get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
.set_name("Conv2d_3b_1x1/convolution")
<< BatchNormalizationLayer(get_weights_accessor(data_path,
@@ -133,7 +138,7 @@ public:
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_3b_1x1/Relu")
<< ConvolutionLayer(3U, 3U, 192U,
- get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_weights.npy"),
+ get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
.set_name("Conv2d_4a_3x3/convolution")
<< BatchNormalizationLayer(get_weights_accessor(data_path,
@@ -148,45 +153,45 @@ public:
<< PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("MaxPool_5a_3x3/MaxPool");
- graph << get_inception_node_A(data_path, "Mixed_5b", 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U),
+ graph << get_inception_node_A(data_path, "Mixed_5b", weights_layout, 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U),
32U)
.set_name("Mixed_5b/concat");
- graph << get_inception_node_A(data_path, "Mixed_5c", 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U),
+ graph << get_inception_node_A(data_path, "Mixed_5c", weights_layout, 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U),
64U, true)
.set_name("Mixed_5c/concat");
- graph << get_inception_node_A(data_path, "Mixed_5d", 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U),
+ graph << get_inception_node_A(data_path, "Mixed_5d", weights_layout, 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U),
64U)
.set_name("Mixed_5d/concat");
- graph << get_inception_node_B(data_path, "Mixed_6a", 384U, std::make_tuple(64U, 96U, 96U)).set_name("Mixed_6a/concat");
+ graph << get_inception_node_B(data_path, "Mixed_6a", weights_layout, 384U, std::make_tuple(64U, 96U, 96U)).set_name("Mixed_6a/concat");
- graph << get_inception_node_C(data_path, "Mixed_6b", 192U, std::make_tuple(128U, 128U, 192U),
+ graph << get_inception_node_C(data_path, "Mixed_6b", weights_layout, 192U, std::make_tuple(128U, 128U, 192U),
std::make_tuple(128U, 128U, 128U, 128U, 192U), 192U)
.set_name("Mixed_6b/concat");
- graph << get_inception_node_C(data_path, "Mixed_6c", 192U, std::make_tuple(160U, 160U, 192U),
+ graph << get_inception_node_C(data_path, "Mixed_6c", weights_layout, 192U, std::make_tuple(160U, 160U, 192U),
std::make_tuple(160U, 160U, 160U, 160U, 192U), 192U)
.set_name("Mixed_6c/concat");
- graph << get_inception_node_C(data_path, "Mixed_6d", 192U, std::make_tuple(160U, 160U, 192U),
+ graph << get_inception_node_C(data_path, "Mixed_6d", weights_layout, 192U, std::make_tuple(160U, 160U, 192U),
std::make_tuple(160U, 160U, 160U, 160U, 192U), 192U)
.set_name("Mixed_6d/concat");
- graph << get_inception_node_C(data_path, "Mixed_6e", 192U, std::make_tuple(192U, 192U, 192U),
+ graph << get_inception_node_C(data_path, "Mixed_6e", weights_layout, 192U, std::make_tuple(192U, 192U, 192U),
std::make_tuple(192U, 192U, 192U, 192U, 192U), 192U)
.set_name("Mixed_6e/concat");
- graph << get_inception_node_D(data_path, "Mixed_7a", std::make_tuple(192U, 320U),
+ graph << get_inception_node_D(data_path, "Mixed_7a", weights_layout, std::make_tuple(192U, 320U),
std::make_tuple(192U, 192U, 192U, 192U))
.set_name("Mixed_7a/concat");
- graph << get_inception_node_E(data_path, "Mixed_7b", 320U, std::make_tuple(384U, 384U, 384U),
+ graph << get_inception_node_E(data_path, "Mixed_7b", weights_layout, 320U, std::make_tuple(384U, 384U, 384U),
std::make_tuple(448U, 384U, 384U, 384U), 192U)
.set_name("Mixed_7b/concat");
- graph << get_inception_node_E(data_path, "Mixed_7c", 320U, std::make_tuple(384U, 384U, 384U),
+ graph << get_inception_node_E(data_path, "Mixed_7c", weights_layout, 320U, std::make_tuple(384U, 384U, 384U),
std::make_tuple(448U, 384U, 384U, 384U), 192U, true)
.set_name("Mixed_7c/concat");
graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 8, PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL))).set_name("Logits/AvgPool_1a_8x8/AvgPool")
<< ConvolutionLayer(1U, 1U, 1001U, get_weights_accessor(data_path,
- "/cnn_data/inceptionv3_model/Logits_Conv2d_1c_1x1_weights.npy"),
+ "/cnn_data/inceptionv3_model/Logits_Conv2d_1c_1x1_weights.npy", weights_layout),
get_weights_accessor(data_path,
"/cnn_data/inceptionv3_model/Logits_Conv2d_1c_1x1_biases.npy"),
PadStrideInfo(1, 1, 0, 0))
@@ -218,7 +223,7 @@ private:
Stream graph;
private:
- BranchLayer get_inception_node_A(const std::string &data_path, std::string &&param_path,
+ BranchLayer get_inception_node_A(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
unsigned int a_filt,
std::tuple<unsigned int, unsigned int> b_filters,
std::tuple<unsigned int, unsigned int, unsigned int> c_filters,
@@ -239,7 +244,7 @@ private:
SubStream i_a(graph);
i_a << ConvolutionLayer(
1U, 1U, a_filt,
- get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 0))
.set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution")
@@ -255,7 +260,7 @@ private:
SubStream i_b(graph);
i_b << ConvolutionLayer(
1U, 1U, std::get<0>(b_filters),
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 0))
.set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/convolution")
@@ -269,7 +274,7 @@ private:
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/Relu")
<< ConvolutionLayer(
5U, 5U, std::get<1>(b_filters),
- get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 2, 2))
.set_name(param_path + "/Branch_1/Conv2d" + conv_id1 + "5x5/convolution")
@@ -285,7 +290,7 @@ private:
SubStream i_c(graph);
i_c << ConvolutionLayer(
1U, 1U, std::get<0>(c_filters),
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 0))
.set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution")
@@ -299,7 +304,7 @@ private:
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu")
<< ConvolutionLayer(
3U, 3U, std::get<1>(c_filters),
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 1, 1))
.set_name(param_path + "/Branch_2/Conv2d_0b_3x3/convolution")
@@ -313,7 +318,7 @@ private:
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Relu")
<< ConvolutionLayer(
3U, 3U, std::get<2>(c_filters),
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 1, 1))
.set_name(param_path + "/Branch_2/Conv2d_0c_3x3/convolution")
@@ -330,7 +335,7 @@ private:
i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)).set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool")
<< ConvolutionLayer(
1U, 1U, d_filt,
- get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 0))
.set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution")
@@ -346,7 +351,7 @@ private:
return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d));
}
- BranchLayer get_inception_node_B(const std::string &data_path, std::string &&param_path,
+ BranchLayer get_inception_node_B(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
unsigned int a_filt,
std::tuple<unsigned int, unsigned int, unsigned int> b_filters)
{
@@ -354,7 +359,7 @@ private:
SubStream i_a(graph);
i_a << ConvolutionLayer(
3U, 3U, a_filt,
- get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(2, 2, 0, 0))
.set_name(param_path + "/Branch_0/Conv2d_1a_1x1/convolution")
@@ -370,7 +375,7 @@ private:
SubStream i_b(graph);
i_b << ConvolutionLayer(
1U, 1U, std::get<0>(b_filters),
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 0))
.set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution")
@@ -384,7 +389,7 @@ private:
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu")
<< ConvolutionLayer(
3U, 3U, std::get<1>(b_filters),
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 1, 1))
.set_name(param_path + "/Branch_1/Conv2d_0b_3x3/convolution")
@@ -398,7 +403,7 @@ private:
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_3x3/Relu")
<< ConvolutionLayer(
3U, 3U, std::get<2>(b_filters),
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(2, 2, 0, 0))
.set_name(param_path + "/Branch_1/Conv2d_1a_1x1/convolution")
@@ -417,7 +422,7 @@ private:
return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c));
}
- BranchLayer get_inception_node_C(const std::string &data_path, std::string &&param_path,
+ BranchLayer get_inception_node_C(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
unsigned int a_filt,
std::tuple<unsigned int, unsigned int, unsigned int> b_filters,
std::tuple<unsigned int, unsigned int, unsigned int, unsigned int, unsigned int> c_filters,
@@ -427,7 +432,7 @@ private:
SubStream i_a(graph);
i_a << ConvolutionLayer(
1U, 1U, a_filt,
- get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 0))
.set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution")
@@ -443,7 +448,7 @@ private:
SubStream i_b(graph);
i_b << ConvolutionLayer(
1U, 1U, std::get<0>(b_filters),
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 0))
.set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution")
@@ -457,7 +462,7 @@ private:
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu")
<< ConvolutionLayer(
7U, 1U, std::get<1>(b_filters),
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 3, 0))
.set_name(param_path + "/Branch_1/Conv2d_0b_1x7/convolution")
@@ -471,7 +476,7 @@ private:
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Relu")
<< ConvolutionLayer(
1U, 7U, std::get<2>(b_filters),
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 3))
.set_name(param_path + "/Branch_1/Conv2d_0c_7x1/convolution")
@@ -487,7 +492,7 @@ private:
SubStream i_c(graph);
i_c << ConvolutionLayer(
1U, 1U, std::get<0>(c_filters),
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 0))
.set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution")
@@ -501,7 +506,7 @@ private:
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu")
<< ConvolutionLayer(
1U, 7U, std::get<1>(c_filters),
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 3))
.set_name(param_path + "/Branch_2/Conv2d_0b_7x1/convolution")
@@ -515,7 +520,7 @@ private:
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_7x1/Relu")
<< ConvolutionLayer(
7U, 1U, std::get<2>(c_filters),
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 3, 0))
.set_name(param_path + "/Branch_2/Conv2d_0c_1x7/convolution")
@@ -529,7 +534,7 @@ private:
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0c_1x7/Relu")
<< ConvolutionLayer(
1U, 7U, std::get<3>(c_filters),
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 3))
.set_name(param_path + "/Branch_2/Conv2d_0d_7x1/convolution")
@@ -543,7 +548,7 @@ private:
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0d_7x1/Relu")
<< ConvolutionLayer(
7U, 1U, std::get<4>(c_filters),
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 3, 0))
.set_name(param_path + "/Branch_2/Conv2d_0e_1x7/convolution")
@@ -560,7 +565,7 @@ private:
i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)).set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool")
<< ConvolutionLayer(
1U, 1U, d_filt,
- get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 0))
.set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution")
@@ -576,15 +581,15 @@ private:
return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d));
}
- BranchLayer get_inception_node_D(const std::string &data_path, std::string &&param_path,
- std::tuple<unsigned int, unsigned int> a_filters,
+ BranchLayer get_inception_node_D(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
+ std::tuple<unsigned int, unsigned int> a_filters,
std::tuple<unsigned int, unsigned int, unsigned int, unsigned int> b_filters)
{
std::string total_path = "/cnn_data/inceptionv3_model/" + param_path + "_";
SubStream i_a(graph);
i_a << ConvolutionLayer(
1U, 1U, std::get<0>(a_filters),
- get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 0))
.set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution")
@@ -598,7 +603,7 @@ private:
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu")
<< ConvolutionLayer(
3U, 3U, std::get<1>(a_filters),
- get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(2, 2, 0, 0))
.set_name(param_path + "/Branch_0/Conv2d_1a_3x3/convolution")
@@ -614,7 +619,7 @@ private:
SubStream i_b(graph);
i_b << ConvolutionLayer(
1U, 1U, std::get<0>(b_filters),
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 0))
.set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution")
@@ -628,7 +633,7 @@ private:
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu")
<< ConvolutionLayer(
7U, 1U, std::get<1>(b_filters),
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 3, 0))
.set_name(param_path + "/Branch_1/Conv2d_0b_1x7/convolution")
@@ -642,7 +647,7 @@ private:
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Relu")
<< ConvolutionLayer(
1U, 7U, std::get<2>(b_filters),
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 3))
.set_name(param_path + "/Branch_1/Conv2d_0c_7x1/convolution")
@@ -656,7 +661,7 @@ private:
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0c_7x1/Relu")
<< ConvolutionLayer(
3U, 3U, std::get<3>(b_filters),
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(2, 2, 0, 0))
.set_name(param_path + "/Branch_1/Conv2d_1a_3x3/convolution")
@@ -675,7 +680,7 @@ private:
return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c));
}
- BranchLayer get_inception_node_E(const std::string &data_path, std::string &&param_path,
+ BranchLayer get_inception_node_E(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
unsigned int a_filt,
std::tuple<unsigned int, unsigned int, unsigned int> b_filters,
std::tuple<unsigned int, unsigned int, unsigned int, unsigned int> c_filters,
@@ -693,7 +698,7 @@ private:
SubStream i_a(graph);
i_a << ConvolutionLayer(
1U, 1U, a_filt,
- get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 0))
.set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution")
@@ -709,7 +714,7 @@ private:
SubStream i_b(graph);
i_b << ConvolutionLayer(
1U, 1U, std::get<0>(b_filters),
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 0))
.set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution")
@@ -725,7 +730,7 @@ private:
SubStream i_b1(i_b);
i_b1 << ConvolutionLayer(
3U, 1U, std::get<1>(b_filters),
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 1, 0))
.set_name(param_path + "/Branch_1/Conv2d_0b_1x3/convolution")
@@ -741,7 +746,7 @@ private:
SubStream i_b2(i_b);
i_b2 << ConvolutionLayer(
1U, 3U, std::get<2>(b_filters),
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 1))
.set_name(param_path + "/Branch_1/Conv2d" + conv_id + "3x1/convolution")
@@ -760,7 +765,7 @@ private:
SubStream i_c(graph);
i_c << ConvolutionLayer(
1U, 1U, std::get<0>(c_filters),
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 0))
.set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution")
@@ -774,7 +779,7 @@ private:
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu")
<< ConvolutionLayer(
3U, 3U, std::get<1>(c_filters),
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 1, 1))
.set_name(param_path + "/Branch_2/Conv2d_0b_3x3/convolution")
@@ -790,7 +795,7 @@ private:
SubStream i_c1(i_c);
i_c1 << ConvolutionLayer(
3U, 1U, std::get<2>(c_filters),
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 1, 0))
.set_name(param_path + "/Branch_2/Conv2d_0c_1x3/convolution")
@@ -806,7 +811,7 @@ private:
SubStream i_c2(i_c);
i_c2 << ConvolutionLayer(
1U, 3U, std::get<3>(c_filters),
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 1))
.set_name(param_path + "/Branch_2/Conv2d_0d_3x1/convolution")
@@ -826,7 +831,7 @@ private:
i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)).set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool")
<< ConvolutionLayer(
1U, 1U, d_filt,
- get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 0))
.set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution")
diff --git a/examples/graph_inception_v4.cpp b/examples/graph_inception_v4.cpp
index 4e405923fc..b61acfcb3f 100644
--- a/examples/graph_inception_v4.cpp
+++ b/examples/graph_inception_v4.cpp
@@ -60,7 +60,6 @@ public:
// Checks
ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "Unsupported data type!");
- ARM_COMPUTE_EXIT_ON_MSG(common_params.data_layout == DataLayout::NHWC, "Unsupported data layout!");
// Print parameter values
std::cout << common_params << std::endl;
@@ -71,13 +70,19 @@ public:
// Create a preprocessor object
std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>();
+ // Create input descriptor
+ const TensorShape tensor_shape = permute_shape(TensorShape(299U, 299U, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
+ TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
+
+ // Set weights trained layout
+ const DataLayout weights_layout = DataLayout::NCHW;
+
graph << common_params.target
<< common_params.fast_math_hint
- << InputLayer(TensorDescriptor(TensorShape(299U, 299U, 3U, 1U), common_params.data_type),
- get_input_accessor(common_params, std::move(preprocessor), false))
+ << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false))
// Conv2d_1a_3x3
<< ConvolutionLayer(3U, 3U, 32U,
- get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_weights.npy"),
+ get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
@@ -87,7 +92,7 @@ public:
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
// Conv2d_2a_3x3
<< ConvolutionLayer(3U, 3U, 32U,
- get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_weights.npy"),
+ get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_BatchNorm_moving_variance.npy"),
@@ -97,7 +102,7 @@ public:
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
// Conv2d_2b_3x3
<< ConvolutionLayer(3U, 3U, 64U,
- get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_weights.npy"),
+ get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
<< BatchNormalizationLayer(get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_BatchNorm_moving_variance.npy"),
@@ -106,35 +111,35 @@ public:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- graph << get_mixed_3a(data_path);
- graph << get_mixed_4a(data_path);
- graph << get_mixed_5a(data_path);
+ graph << get_mixed_3a(data_path, weights_layout);
+ graph << get_mixed_4a(data_path, weights_layout);
+ graph << get_mixed_5a(data_path, weights_layout);
// 4 inception A blocks
- graph << get_inceptionA_block(data_path, "Mixed_5b");
- graph << get_inceptionA_block(data_path, "Mixed_5c");
- graph << get_inceptionA_block(data_path, "Mixed_5d");
- graph << get_inceptionA_block(data_path, "Mixed_5e");
+ graph << get_inceptionA_block(data_path, weights_layout, "Mixed_5b");
+ graph << get_inceptionA_block(data_path, weights_layout, "Mixed_5c");
+ graph << get_inceptionA_block(data_path, weights_layout, "Mixed_5d");
+ graph << get_inceptionA_block(data_path, weights_layout, "Mixed_5e");
// reduction A block
- graph << get_reductionA_block(data_path);
+ graph << get_reductionA_block(data_path, weights_layout);
// 7 inception B blocks
- graph << get_inceptionB_block(data_path, "Mixed_6b");
- graph << get_inceptionB_block(data_path, "Mixed_6c");
- graph << get_inceptionB_block(data_path, "Mixed_6d");
- graph << get_inceptionB_block(data_path, "Mixed_6e");
- graph << get_inceptionB_block(data_path, "Mixed_6f");
- graph << get_inceptionB_block(data_path, "Mixed_6g");
- graph << get_inceptionB_block(data_path, "Mixed_6h");
+ graph << get_inceptionB_block(data_path, weights_layout, "Mixed_6b");
+ graph << get_inceptionB_block(data_path, weights_layout, "Mixed_6c");
+ graph << get_inceptionB_block(data_path, weights_layout, "Mixed_6d");
+ graph << get_inceptionB_block(data_path, weights_layout, "Mixed_6e");
+ graph << get_inceptionB_block(data_path, weights_layout, "Mixed_6f");
+ graph << get_inceptionB_block(data_path, weights_layout, "Mixed_6g");
+ graph << get_inceptionB_block(data_path, weights_layout, "Mixed_6h");
// reduction B block
- graph << get_reductionB_block(data_path);
+ graph << get_reductionB_block(data_path, weights_layout);
// 3 inception C blocks
- graph << get_inceptionC_block(data_path, "Mixed_7b");
- graph << get_inceptionC_block(data_path, "Mixed_7c");
- graph << get_inceptionC_block(data_path, "Mixed_7d");
+ graph << get_inceptionC_block(data_path, weights_layout, "Mixed_7b");
+ graph << get_inceptionC_block(data_path, weights_layout, "Mixed_7c");
+ graph << get_inceptionC_block(data_path, weights_layout, "Mixed_7d");
graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG))
<< FlattenLayer()
<< FullyConnectedLayer(
1001U,
- get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Logits_Logits_weights.npy"),
+ get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Logits_Logits_weights.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Logits_Logits_biases.npy"))
<< SoftmaxLayer()
<< OutputLayer(get_output_accessor(common_params, 5));
@@ -162,7 +167,7 @@ private:
Stream graph;
private:
- BranchLayer get_mixed_3a(const std::string &data_path)
+ BranchLayer get_mixed_3a(const std::string &data_path, DataLayout weights_layout)
{
std::string total_path = "/cnn_data/inceptionv4_model/Mixed_3a_";
@@ -171,7 +176,7 @@ private:
SubStream i_b(graph);
i_b << ConvolutionLayer(3U, 3U, 96U,
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_BatchNorm_moving_variance.npy"),
@@ -183,13 +188,13 @@ private:
return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b));
}
- BranchLayer get_mixed_4a(const std::string &data_path)
+ BranchLayer get_mixed_4a(const std::string &data_path, DataLayout weights_layout)
{
std::string total_path = "/cnn_data/inceptionv4_model/Mixed_4a_";
SubStream i_a(graph);
i_a << ConvolutionLayer(1U, 1U, 64U,
- get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
@@ -198,7 +203,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< ConvolutionLayer(3U, 3U, 96U,
- get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
@@ -209,7 +214,7 @@ private:
SubStream i_b(graph);
i_b << ConvolutionLayer(1U, 1U, 64U,
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
@@ -218,7 +223,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< ConvolutionLayer(7U, 1U, 64U,
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"),
@@ -227,7 +232,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< ConvolutionLayer(1U, 7U, 64U,
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"),
@@ -236,7 +241,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< ConvolutionLayer(3U, 3U, 96U,
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
@@ -248,13 +253,13 @@ private:
return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b));
}
- BranchLayer get_mixed_5a(const std::string &data_path)
+ BranchLayer get_mixed_5a(const std::string &data_path, DataLayout weights_layout)
{
std::string total_path = "/cnn_data/inceptionv4_model/Mixed_5a_";
SubStream i_a(graph);
i_a << ConvolutionLayer(3U, 3U, 192U,
- get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
@@ -269,13 +274,13 @@ private:
return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b));
}
- BranchLayer get_inceptionA_block(const std::string &data_path, std::string &&param_path)
+ BranchLayer get_inceptionA_block(const std::string &data_path, DataLayout weights_layout, std::string &&param_path)
{
std::string total_path = "/cnn_data/inceptionv4_model/" + param_path + "_";
SubStream i_a(graph);
i_a << ConvolutionLayer(1U, 1U, 96U,
- get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
@@ -286,7 +291,7 @@ private:
SubStream i_b(graph);
i_b << ConvolutionLayer(1U, 1U, 64U,
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
@@ -295,7 +300,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< ConvolutionLayer(3U, 3U, 96U,
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
@@ -306,7 +311,7 @@ private:
SubStream i_c(graph);
i_c << ConvolutionLayer(1U, 1U, 64U,
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
@@ -315,7 +320,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< ConvolutionLayer(3U, 3U, 96U,
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
@@ -324,7 +329,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< ConvolutionLayer(3U, 3U, 96U,
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"),
@@ -336,7 +341,7 @@ private:
SubStream i_d(graph);
i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true))
<< ConvolutionLayer(1U, 1U, 96U,
- get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"),
@@ -348,13 +353,13 @@ private:
return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d));
}
- BranchLayer get_reductionA_block(const std::string &data_path)
+ BranchLayer get_reductionA_block(const std::string &data_path, DataLayout weights_layout)
{
std::string total_path = "/cnn_data/inceptionv4_model/Mixed_6a_";
SubStream i_a(graph);
i_a << ConvolutionLayer(3U, 3U, 384U,
- get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
@@ -365,7 +370,7 @@ private:
SubStream i_b(graph);
i_b << ConvolutionLayer(1U, 1U, 192U,
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
@@ -374,7 +379,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< ConvolutionLayer(3U, 3U, 224U,
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
@@ -383,7 +388,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< ConvolutionLayer(3U, 3U, 256U,
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
@@ -398,13 +403,13 @@ private:
return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c));
}
- BranchLayer get_inceptionB_block(const std::string &data_path, std::string &&param_path)
+ BranchLayer get_inceptionB_block(const std::string &data_path, DataLayout weights_layout, std::string &&param_path)
{
std::string total_path = "/cnn_data/inceptionv4_model/" + param_path + "_";
SubStream i_a(graph);
i_a << ConvolutionLayer(1U, 1U, 384U,
- get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
@@ -415,7 +420,7 @@ private:
SubStream i_b(graph);
i_b << ConvolutionLayer(1U, 1U, 192U,
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
@@ -424,7 +429,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< ConvolutionLayer(7U, 1U, 224U,
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"),
@@ -433,7 +438,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< ConvolutionLayer(1U, 7U, 256U,
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"),
@@ -444,7 +449,7 @@ private:
SubStream i_c(graph);
i_c << ConvolutionLayer(1U, 1U, 192U,
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
@@ -453,7 +458,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< ConvolutionLayer(1U, 7U, 192U,
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_variance.npy"),
@@ -462,7 +467,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< ConvolutionLayer(7U, 1U, 224U,
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_variance.npy"),
@@ -471,7 +476,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< ConvolutionLayer(1U, 7U, 224U,
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_variance.npy"),
@@ -480,7 +485,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< ConvolutionLayer(7U, 1U, 256U,
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_variance.npy"),
@@ -492,7 +497,7 @@ private:
SubStream i_d(graph);
i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true))
<< ConvolutionLayer(1U, 1U, 128U,
- get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"),
@@ -504,13 +509,13 @@ private:
return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d));
}
- BranchLayer get_reductionB_block(const std::string &data_path)
+ BranchLayer get_reductionB_block(const std::string &data_path, DataLayout weights_layout)
{
std::string total_path = "/cnn_data/inceptionv4_model/Mixed_7a_";
SubStream i_a(graph);
i_a << ConvolutionLayer(1U, 1U, 192U,
- get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
@@ -519,7 +524,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< ConvolutionLayer(3U, 3U, 192U,
- get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
@@ -530,7 +535,7 @@ private:
SubStream i_b(graph);
i_b << ConvolutionLayer(1U, 1U, 256U,
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
@@ -539,7 +544,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< ConvolutionLayer(7U, 1U, 256U,
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"),
@@ -548,7 +553,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< ConvolutionLayer(1U, 7U, 320U,
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"),
@@ -557,7 +562,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< ConvolutionLayer(3U, 3U, 320U,
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
@@ -572,13 +577,13 @@ private:
return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c));
}
- BranchLayer get_inceptionC_block(const std::string &data_path, std::string &&param_path)
+ BranchLayer get_inceptionC_block(const std::string &data_path, DataLayout weights_layout, std::string &&param_path)
{
std::string total_path = "/cnn_data/inceptionv4_model/" + param_path + "_";
SubStream i_a(graph);
i_a << ConvolutionLayer(1U, 1U, 256U,
- get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
@@ -590,7 +595,7 @@ private:
SubStream i_b(graph);
i_b << ConvolutionLayer(
1U, 1U, 384U,
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 0))
<< BatchNormalizationLayer(
@@ -604,7 +609,7 @@ private:
SubStream i_b1(i_b);
i_b1 << ConvolutionLayer(
3U, 1U, 256U,
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 1, 0))
<< BatchNormalizationLayer(
@@ -618,7 +623,7 @@ private:
SubStream i_b2(i_b);
i_b2 << ConvolutionLayer(
1U, 3U, 256U,
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 1))
<< BatchNormalizationLayer(
@@ -635,7 +640,7 @@ private:
SubStream i_c(graph);
i_c << ConvolutionLayer(
1U, 1U, 384U,
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 0))
<< BatchNormalizationLayer(
@@ -647,7 +652,7 @@ private:
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< ConvolutionLayer(
1U, 3U, 448U,
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 1))
<< BatchNormalizationLayer(
@@ -659,7 +664,7 @@ private:
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< ConvolutionLayer(
3U, 1U, 512U,
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 1, 0))
<< BatchNormalizationLayer(
@@ -673,7 +678,7 @@ private:
SubStream i_c1(i_c);
i_c1 << ConvolutionLayer(
3U, 1U, 256U,
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 1, 0))
<< BatchNormalizationLayer(
@@ -687,7 +692,7 @@ private:
SubStream i_c2(i_c);
i_c2 << ConvolutionLayer(
1U, 3U, 256U,
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 1))
<< BatchNormalizationLayer(
@@ -704,7 +709,7 @@ private:
SubStream i_d(graph);
i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true))
<< ConvolutionLayer(1U, 1U, 256U,
- get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
<< BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"),
get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"),
diff --git a/examples/graph_mobilenet.cpp b/examples/graph_mobilenet.cpp
index a747b3cd11..d182844530 100644
--- a/examples/graph_mobilenet.cpp
+++ b/examples/graph_mobilenet.cpp
@@ -85,7 +85,7 @@ public:
// Set graph hints
graph << common_params.target
- << DepthwiseConvolutionMethod::OPTIMIZED_3x3 // FIXME(COMPMID-1073): Add heuristics to automatically call the optimized 3x3 method
+ << DepthwiseConvolutionMethod::Optimized3x3 // FIXME(COMPMID-1073): Add heuristics to automatically call the optimized 3x3 method
<< common_params.fast_math_hint;
// Create core graph
diff --git a/examples/graph_resnet50.cpp b/examples/graph_resnet50.cpp
index 58f36f6ae4..0ad719a2ca 100644
--- a/examples/graph_resnet50.cpp
+++ b/examples/graph_resnet50.cpp
@@ -60,7 +60,6 @@ public:
// Checks
ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "Unsupported data type!");
- ARM_COMPUTE_EXIT_ON_MSG(common_params.data_layout == DataLayout::NHWC, "Unsupported data layout!");
// Print parameter values
std::cout << common_params << std::endl;
@@ -72,13 +71,20 @@ public:
const std::array<float, 3> mean_rgb{ { 122.68f, 116.67f, 104.01f } };
std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb,
false /* Do not convert to BGR */);
+
+ // Create input descriptor
+ const TensorShape tensor_shape = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
+ TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
+
+ // Set weights trained layout
+ const DataLayout weights_layout = DataLayout::NCHW;
+
graph << common_params.target
<< common_params.fast_math_hint
- << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), common_params.data_type),
- get_input_accessor(common_params, std::move(preprocessor), false /* Do not convert to BGR */))
+ << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false /* Do not convert to BGR */))
<< ConvolutionLayer(
7U, 7U, 64U,
- get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_weights.npy"),
+ get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(2, 2, 3, 3))
.set_name("conv1/convolution")
@@ -92,15 +98,15 @@ public:
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1/Relu")
<< PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))).set_name("pool1/MaxPool");
- add_residual_block(data_path, "block1", 64, 3, 2);
- add_residual_block(data_path, "block2", 128, 4, 2);
- add_residual_block(data_path, "block3", 256, 6, 2);
- add_residual_block(data_path, "block4", 512, 3, 1);
+ add_residual_block(data_path, "block1", weights_layout, 64, 3, 2);
+ add_residual_block(data_path, "block2", weights_layout, 128, 4, 2);
+ add_residual_block(data_path, "block3", weights_layout, 256, 6, 2);
+ add_residual_block(data_path, "block4", weights_layout, 512, 3, 1);
graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG)).set_name("pool5")
<< ConvolutionLayer(
1U, 1U, 1000U,
- get_weights_accessor(data_path, "/cnn_data/resnet50_model/logits_weights.npy"),
+ get_weights_accessor(data_path, "/cnn_data/resnet50_model/logits_weights.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/resnet50_model/logits_biases.npy"),
PadStrideInfo(1, 1, 0, 0))
.set_name("logits/convolution")
@@ -129,7 +135,8 @@ private:
CommonGraphParams common_params;
Stream graph;
- void add_residual_block(const std::string &data_path, const std::string &name, unsigned int base_depth, unsigned int num_units, unsigned int stride)
+ void add_residual_block(const std::string &data_path, const std::string &name, DataLayout weights_layout,
+ unsigned int base_depth, unsigned int num_units, unsigned int stride)
{
for(unsigned int i = 0; i < num_units; ++i)
{
@@ -151,7 +158,7 @@ private:
SubStream right(graph);
right << ConvolutionLayer(
1U, 1U, base_depth,
- get_weights_accessor(data_path, unit_path + "conv1_weights.npy"),
+ get_weights_accessor(data_path, unit_path + "conv1_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 0))
.set_name(unit_name + "conv1/convolution")
@@ -166,7 +173,7 @@ private:
<< ConvolutionLayer(
3U, 3U, base_depth,
- get_weights_accessor(data_path, unit_path + "conv2_weights.npy"),
+ get_weights_accessor(data_path, unit_path + "conv2_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(middle_stride, middle_stride, 1, 1))
.set_name(unit_name + "conv2/convolution")
@@ -181,7 +188,7 @@ private:
<< ConvolutionLayer(
1U, 1U, base_depth * 4,
- get_weights_accessor(data_path, unit_path + "conv3_weights.npy"),
+ get_weights_accessor(data_path, unit_path + "conv3_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 0))
.set_name(unit_name + "conv3/convolution")
@@ -198,7 +205,7 @@ private:
SubStream left(graph);
left << ConvolutionLayer(
1U, 1U, base_depth * 4,
- get_weights_accessor(data_path, unit_path + "shortcut_weights.npy"),
+ get_weights_accessor(data_path, unit_path + "shortcut_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 0))
.set_name(unit_name + "shortcut/convolution")
diff --git a/examples/graph_resnext50.cpp b/examples/graph_resnext50.cpp
index c0e9b9f22a..e7ef013f17 100644
--- a/examples/graph_resnext50.cpp
+++ b/examples/graph_resnext50.cpp
@@ -60,7 +60,6 @@ public:
// Checks
ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "Unsupported data type!");
- ARM_COMPUTE_EXIT_ON_MSG(common_params.data_layout == DataLayout::NHWC, "Unsupported data layout!");
// Print parameter values
std::cout << common_params << std::endl;
@@ -68,26 +67,32 @@ public:
// Get trainable parameters data path
std::string data_path = common_params.data_path;
+ // Create input descriptor
+ const TensorShape tensor_shape = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
+ TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
+
+ // Set weights trained layout
+ const DataLayout weights_layout = DataLayout::NCHW;
+
graph << common_params.target
<< common_params.fast_math_hint
- << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), common_params.data_type),
- get_input_accessor(common_params))
+ << InputLayer(input_descriptor, get_input_accessor(common_params))
<< ScaleLayer(get_weights_accessor(data_path, "/cnn_data/resnext50_model/bn_data_mul.npy"),
get_weights_accessor(data_path, "/cnn_data/resnext50_model/bn_data_add.npy"))
.set_name("bn_data/Scale")
<< ConvolutionLayer(
7U, 7U, 64U,
- get_weights_accessor(data_path, "/cnn_data/resnext50_model/conv0_weights.npy"),
+ get_weights_accessor(data_path, "/cnn_data/resnext50_model/conv0_weights.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/resnext50_model/conv0_biases.npy"),
PadStrideInfo(2, 2, 2, 3, 2, 3, DimensionRoundingType::FLOOR))
.set_name("conv0/Convolution")
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv0/Relu")
<< PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))).set_name("pool0");
- add_residual_block(data_path, /*ofm*/ 256, /*stage*/ 1, /*num_unit*/ 3, /*stride_conv_unit1*/ 1);
- add_residual_block(data_path, 512, 2, 4, 2);
- add_residual_block(data_path, 1024, 3, 6, 2);
- add_residual_block(data_path, 2048, 4, 3, 2);
+ add_residual_block(data_path, weights_layout, /*ofm*/ 256, /*stage*/ 1, /*num_unit*/ 3, /*stride_conv_unit1*/ 1);
+ add_residual_block(data_path, weights_layout, 512, 2, 4, 2);
+ add_residual_block(data_path, weights_layout, 1024, 3, 6, 2);
+ add_residual_block(data_path, weights_layout, 2048, 4, 3, 2);
graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG)).set_name("pool1")
<< FlattenLayer().set_name("predictions/Reshape")
@@ -116,7 +121,8 @@ private:
CommonGraphParams common_params;
Stream graph;
- void add_residual_block(const std::string &data_path, unsigned int base_depth, unsigned int stage, unsigned int num_units, unsigned int stride_conv_unit1)
+ void add_residual_block(const std::string &data_path, DataLayout weights_layout,
+ unsigned int base_depth, unsigned int stage, unsigned int num_units, unsigned int stride_conv_unit1)
{
for(unsigned int i = 0; i < num_units; ++i)
{
@@ -137,7 +143,7 @@ private:
SubStream right(graph);
right << ConvolutionLayer(
1U, 1U, base_depth / 2,
- get_weights_accessor(data_path, unit_path + "conv1_weights.npy"),
+ get_weights_accessor(data_path, unit_path + "conv1_weights.npy", weights_layout),
get_weights_accessor(data_path, unit_path + "conv1_biases.npy"),
PadStrideInfo(1, 1, 0, 0))
.set_name(unit_name + "conv1/convolution")
@@ -145,7 +151,7 @@ private:
<< ConvolutionLayer(
3U, 3U, base_depth / 2,
- get_weights_accessor(data_path, unit_path + "conv2_weights.npy"),
+ get_weights_accessor(data_path, unit_path + "conv2_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
pad_grouped_conv, 32)
.set_name(unit_name + "conv2/convolution")
@@ -156,7 +162,7 @@ private:
<< ConvolutionLayer(
1U, 1U, base_depth,
- get_weights_accessor(data_path, unit_path + "conv3_weights.npy"),
+ get_weights_accessor(data_path, unit_path + "conv3_weights.npy", weights_layout),
get_weights_accessor(data_path, unit_path + "conv3_biases.npy"),
PadStrideInfo(1, 1, 0, 0))
.set_name(unit_name + "conv3/convolution");
@@ -166,7 +172,7 @@ private:
{
left << ConvolutionLayer(
1U, 1U, base_depth,
- get_weights_accessor(data_path, unit_path + "sc_weights.npy"),
+ get_weights_accessor(data_path, unit_path + "sc_weights.npy", weights_layout),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(stride_conv_unit1, stride_conv_unit1, 0, 0))
.set_name(unit_name + "sc/convolution")
diff --git a/examples/graph_squeezenet.cpp b/examples/graph_squeezenet.cpp
index 9439ab4343..b539a9bc34 100644
--- a/examples/graph_squeezenet.cpp
+++ b/examples/graph_squeezenet.cpp
@@ -60,7 +60,6 @@ public:
// Checks
ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "Unsupported data type!");
- ARM_COMPUTE_EXIT_ON_MSG(common_params.data_layout == DataLayout::NHWC, "Unsupported data layout!");
// Print parameter values
std::cout << common_params << std::endl;
@@ -72,78 +71,84 @@ public:
const std::array<float, 3> mean_rgb{ { 122.68f, 116.67f, 104.01f } };
std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb);
+ // Create input descriptor
+ const TensorShape tensor_shape = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
+ TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
+
+ // Set weights trained layout
+ const DataLayout weights_layout = DataLayout::NCHW;
+
graph << common_params.target
<< common_params.fast_math_hint
- << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), common_params.data_type),
- get_input_accessor(common_params, std::move(preprocessor)))
+ << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor)))
<< ConvolutionLayer(
7U, 7U, 96U,
- get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv1_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv1_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv1_b.npy"),
PadStrideInfo(2, 2, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
<< ConvolutionLayer(
1U, 1U, 16U,
- get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire2_squeeze1x1_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire2_squeeze1x1_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire2_squeeze1x1_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- graph << get_expand_fire_node(data_path, "fire2", 64U, 64U);
+ graph << get_expand_fire_node(data_path, "fire2", weights_layout, 64U, 64U);
graph << ConvolutionLayer(
1U, 1U, 16U,
- get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire3_squeeze1x1_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire3_squeeze1x1_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire3_squeeze1x1_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- graph << get_expand_fire_node(data_path, "fire3", 64U, 64U);
+ graph << get_expand_fire_node(data_path, "fire3", weights_layout, 64U, 64U);
graph << ConvolutionLayer(
1U, 1U, 32U,
- get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire4_squeeze1x1_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire4_squeeze1x1_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire4_squeeze1x1_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- graph << get_expand_fire_node(data_path, "fire4", 128U, 128U);
+ graph << get_expand_fire_node(data_path, "fire4", weights_layout, 128U, 128U);
graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
<< ConvolutionLayer(
1U, 1U, 32U,
- get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire5_squeeze1x1_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire5_squeeze1x1_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire5_squeeze1x1_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- graph << get_expand_fire_node(data_path, "fire5", 128U, 128U);
+ graph << get_expand_fire_node(data_path, "fire5", weights_layout, 128U, 128U);
graph << ConvolutionLayer(
1U, 1U, 48U,
- get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire6_squeeze1x1_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire6_squeeze1x1_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire6_squeeze1x1_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- graph << get_expand_fire_node(data_path, "fire6", 192U, 192U);
+ graph << get_expand_fire_node(data_path, "fire6", weights_layout, 192U, 192U);
graph << ConvolutionLayer(
1U, 1U, 48U,
- get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire7_squeeze1x1_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire7_squeeze1x1_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire7_squeeze1x1_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- graph << get_expand_fire_node(data_path, "fire7", 192U, 192U);
+ graph << get_expand_fire_node(data_path, "fire7", weights_layout, 192U, 192U);
graph << ConvolutionLayer(
1U, 1U, 64U,
- get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire8_squeeze1x1_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire8_squeeze1x1_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire8_squeeze1x1_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- graph << get_expand_fire_node(data_path, "fire8", 256U, 256U);
+ graph << get_expand_fire_node(data_path, "fire8", weights_layout, 256U, 256U);
graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
<< ConvolutionLayer(
1U, 1U, 64U,
- get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire9_squeeze1x1_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire9_squeeze1x1_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire9_squeeze1x1_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- graph << get_expand_fire_node(data_path, "fire9", 256U, 256U);
+ graph << get_expand_fire_node(data_path, "fire9", weights_layout, 256U, 256U);
graph << ConvolutionLayer(
1U, 1U, 1000U,
- get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv10_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv10_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv10_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
@@ -174,13 +179,14 @@ private:
CommonGraphParams common_params;
Stream graph;
- BranchLayer get_expand_fire_node(const std::string &data_path, std::string &&param_path, unsigned int expand1_filt, unsigned int expand3_filt)
+ BranchLayer get_expand_fire_node(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
+ unsigned int expand1_filt, unsigned int expand3_filt)
{
std::string total_path = "/cnn_data/squeezenet_v1.0_model/" + param_path + "_";
SubStream i_a(graph);
i_a << ConvolutionLayer(
1U, 1U, expand1_filt,
- get_weights_accessor(data_path, total_path + "expand1x1_w.npy"),
+ get_weights_accessor(data_path, total_path + "expand1x1_w.npy", weights_layout),
get_weights_accessor(data_path, total_path + "expand1x1_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
@@ -188,7 +194,7 @@ private:
SubStream i_b(graph);
i_b << ConvolutionLayer(
3U, 3U, expand3_filt,
- get_weights_accessor(data_path, total_path + "expand3x3_w.npy"),
+ get_weights_accessor(data_path, total_path + "expand3x3_w.npy", weights_layout),
get_weights_accessor(data_path, total_path + "expand3x3_b.npy"),
PadStrideInfo(1, 1, 1, 1))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
diff --git a/examples/graph_squeezenet_v1_1.cpp b/examples/graph_squeezenet_v1_1.cpp
index ba7ee774a7..c0b5ff212d 100644
--- a/examples/graph_squeezenet_v1_1.cpp
+++ b/examples/graph_squeezenet_v1_1.cpp
@@ -60,7 +60,6 @@ public:
// Checks
ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "Unsupported data type!");
- ARM_COMPUTE_EXIT_ON_MSG(common_params.data_layout == DataLayout::NHWC, "Unsupported data layout!");
// Print parameter values
std::cout << common_params << std::endl;
@@ -72,80 +71,86 @@ public:
const std::array<float, 3> mean_rgb{ { 122.68f, 116.67f, 104.01f } };
std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb);
+ // Create input descriptor
+ const TensorShape tensor_shape = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
+ TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
+
+ // Set weights trained layout
+ const DataLayout weights_layout = DataLayout::NCHW;
+
graph << common_params.target
<< common_params.fast_math_hint
- << InputLayer(TensorDescriptor(TensorShape(227U, 227U, 3U, 1U), common_params.data_type),
- get_input_accessor(common_params, std::move(preprocessor)))
- << ConvolutionMethod::DIRECT
+ << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor)))
+ << ConvolutionMethod::Direct
<< ConvolutionLayer(
3U, 3U, 64U,
- get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv1_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv1_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv1_b.npy"),
PadStrideInfo(2, 2, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
- << ConvolutionMethod::DEFAULT
+ << ConvolutionMethod::Default
<< ConvolutionLayer(
1U, 1U, 16U,
- get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire2_squeeze1x1_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire2_squeeze1x1_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire2_squeeze1x1_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- graph << get_expand_fire_node(data_path, "fire2", 64U, 64U);
+ graph << get_expand_fire_node(data_path, "fire2", weights_layout, 64U, 64U);
graph << ConvolutionLayer(
1U, 1U, 16U,
- get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire3_squeeze1x1_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire3_squeeze1x1_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire3_squeeze1x1_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- graph << get_expand_fire_node(data_path, "fire3", 64U, 64U);
+ graph << get_expand_fire_node(data_path, "fire3", weights_layout, 64U, 64U);
graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
<< ConvolutionLayer(
1U, 1U, 32U,
- get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire4_squeeze1x1_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire4_squeeze1x1_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire4_squeeze1x1_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- graph << get_expand_fire_node(data_path, "fire4", 128U, 128U);
+ graph << get_expand_fire_node(data_path, "fire4", weights_layout, 128U, 128U);
graph << ConvolutionLayer(
1U, 1U, 32U,
- get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire5_squeeze1x1_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire5_squeeze1x1_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire5_squeeze1x1_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- graph << get_expand_fire_node(data_path, "fire5", 128U, 128U);
+ graph << get_expand_fire_node(data_path, "fire5", weights_layout, 128U, 128U);
graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
<< ConvolutionLayer(
1U, 1U, 48U,
- get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire6_squeeze1x1_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire6_squeeze1x1_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire6_squeeze1x1_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- graph << get_expand_fire_node(data_path, "fire6", 192U, 192U);
+ graph << get_expand_fire_node(data_path, "fire6", weights_layout, 192U, 192U);
graph << ConvolutionLayer(
1U, 1U, 48U,
- get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire7_squeeze1x1_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire7_squeeze1x1_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire7_squeeze1x1_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- graph << get_expand_fire_node(data_path, "fire7", 192U, 192U);
+ graph << get_expand_fire_node(data_path, "fire7", weights_layout, 192U, 192U);
graph << ConvolutionLayer(
1U, 1U, 64U,
- get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire8_squeeze1x1_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire8_squeeze1x1_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire8_squeeze1x1_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- graph << get_expand_fire_node(data_path, "fire8", 256U, 256U);
+ graph << get_expand_fire_node(data_path, "fire8", weights_layout, 256U, 256U);
graph << ConvolutionLayer(
1U, 1U, 64U,
- get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire9_squeeze1x1_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire9_squeeze1x1_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire9_squeeze1x1_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- graph << get_expand_fire_node(data_path, "fire9", 256U, 256U);
+ graph << get_expand_fire_node(data_path, "fire9", weights_layout, 256U, 256U);
graph << ConvolutionLayer(
1U, 1U, 1000U,
- get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv10_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv10_w.npy", weights_layout),
get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv10_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
@@ -176,13 +181,14 @@ private:
CommonGraphParams common_params;
Stream graph;
- BranchLayer get_expand_fire_node(const std::string &data_path, std::string &&param_path, unsigned int expand1_filt, unsigned int expand3_filt)
+ BranchLayer get_expand_fire_node(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
+ unsigned int expand1_filt, unsigned int expand3_filt)
{
std::string total_path = "/cnn_data/squeezenet_v1_1_model/" + param_path + "_";
SubStream i_a(graph);
i_a << ConvolutionLayer(
1U, 1U, expand1_filt,
- get_weights_accessor(data_path, total_path + "expand1x1_w.npy"),
+ get_weights_accessor(data_path, total_path + "expand1x1_w.npy", weights_layout),
get_weights_accessor(data_path, total_path + "expand1x1_b.npy"),
PadStrideInfo(1, 1, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
@@ -190,7 +196,7 @@ private:
SubStream i_b(graph);
i_b << ConvolutionLayer(
3U, 3U, expand3_filt,
- get_weights_accessor(data_path, total_path + "expand3x3_w.npy"),
+ get_weights_accessor(data_path, total_path + "expand3x3_w.npy", weights_layout),
get_weights_accessor(data_path, total_path + "expand3x3_b.npy"),
PadStrideInfo(1, 1, 1, 1))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
diff --git a/src/core/CL/cl_kernels/pooling_layer.cl b/src/core/CL/cl_kernels/pooling_layer.cl
index c38a78ce3e..080835348d 100644
--- a/src/core/CL/cl_kernels/pooling_layer.cl
+++ b/src/core/CL/cl_kernels/pooling_layer.cl
@@ -549,10 +549,10 @@ __kernel void pooling_layer_MxN_nhwc(
for(int y = 0; y < POOL_SIZE_Y; ++y)
{
- int y1 = select(y, PAD_Y - idx_height, y + idx_height < PAD_Y || y + idx_height > MAX_HEIGHT);
+ int y1 = select(y, PAD_Y - idx_height, y + idx_height - PAD_Y < 0 || y + idx_height - PAD_Y >= MAX_HEIGHT);
for(int x = 0; x < POOL_SIZE_X; ++x)
{
- int x1 = select(x, PAD_X - idx_width - 1, x + idx_width < PAD_X || x + idx_width > MAX_WIDTH);
+ int x1 = select(x, PAD_X - idx_width - 1, x + idx_width - PAD_X < 0 || x + idx_width - PAD_X >= MAX_WIDTH);
x1 = select(x1, PAD_X - idx_width - 1, y != y1);
VEC_DATA_TYPE(DATA_TYPE, 8)
diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp
index b1290b8edd..a09129bba6 100644
--- a/src/core/CL/kernels/CLIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLIm2ColKernel.cpp
@@ -288,7 +288,6 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_ON(input->info()->data_layout() == DataLayout::UNKNOWN);
- ARM_COMPUTE_ERROR_ON_MSG(output->info()->data_layout() != DataLayout::NCHW, "Special case Im2Col output layout is NCHW");
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), has_bias, dilation));
_input = input;
diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
index df01eab240..edc9e9d58c 100644
--- a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
@@ -42,6 +42,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, N
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_layout() == DataLayout::NHWC && norm_info.type() == NormType::IN_MAP_2D,
+ "Only Cross-map and 1D In-map normalization is supported for NHWC layout");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(norm_info.norm_size() % 2), "Normalization size should be odd");
// Checks performed when output is configured
@@ -59,14 +61,15 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
// Output tensor auto initialization if not yet initialized
auto_init_if_empty(*output, *input->clone());
- const unsigned int norm_size = norm_info.norm_size();
- bool is_in_map = norm_info.is_in_map();
+ const unsigned int norm_idx = get_normalization_dimension_index(input->data_layout(), norm_info);
+ const unsigned int norm_size = norm_info.norm_size();
+ bool is_norm_accross_width = norm_idx == 0;
- const unsigned int border_width = is_in_map ? std::min(norm_size / 2, 3U) : 0;
+ const unsigned int border_width = is_norm_accross_width ? std::min(norm_size / 2, 3U) : 0;
const BorderSize border_size = BorderSize(0, border_width);
const unsigned int num_elems_processed_per_iteration = 4;
- const unsigned int num_elems_read_per_iteration = is_in_map ? (num_elems_processed_per_iteration + 2 * (norm_size / 2)) : num_elems_processed_per_iteration;
+ const unsigned int num_elems_read_per_iteration = is_norm_accross_width ? (num_elems_processed_per_iteration + 2 * (norm_size / 2)) : num_elems_processed_per_iteration;
Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
@@ -84,7 +87,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
} // namespace
CLNormalizationLayerKernel::CLNormalizationLayerKernel()
- : _input(nullptr), _output(nullptr), _border_size(0), _is_in_map(false)
+ : _input(nullptr), _output(nullptr), _border_size(0), _is_norm_across_width(false)
{
}
@@ -106,8 +109,9 @@ void CLNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *ou
_input = input;
_output = output;
- _is_in_map = norm_info.is_in_map();
- const unsigned int border_width = _is_in_map ? std::min(norm_info.norm_size() / 2, 3U) : 0;
+ const unsigned int norm_idx = get_normalization_dimension_index(input->info()->data_layout(), norm_info);
+ _is_norm_across_width = norm_idx == 0;
+ const unsigned int border_width = _is_norm_across_width ? std::min(norm_info.norm_size() / 2, 3U) : 0;
_border_size = BorderSize(0, border_width);
const unsigned int num_elems_processed_per_iteration = 4;
@@ -125,7 +129,7 @@ void CLNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *ou
build_opts.add_option_if(is_in_map_2D, "-DIN_MAP_2D");
// Create kernel
- std::string kernel_name = _is_in_map ? "normalization_layer_in_map" : "normalization_layer_cross_map";
+ std::string kernel_name = _is_norm_across_width ? "normalization_layer_in_map" : "normalization_layer_cross_map";
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
// Configure kernel window
@@ -159,7 +163,7 @@ void CLNormalizationLayerKernel::run(const Window &window, cl::CommandQueue &que
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
- const int collapsed_dimension = _is_in_map ? Window::DimZ : 4;
+ const int collapsed_dimension = _is_norm_across_width ? Window::DimZ : 4;
Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), collapsed_dimension);
Window slice = window_collapsed.first_slice_window_3D();
diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
index 246ab68130..d5ea092c78 100644
--- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
@@ -154,7 +154,9 @@ std::tuple<Status, Window, CLPoolingConfig> validate_and_configure_window(ITenso
num_elems_processed_per_iteration = 8;
win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
- AccessWindowRectangle input_access(input, 0, -pool_pad_left, num_elems_processed_per_iteration, pool_size_x);
+ AccessWindowStatic input_access(input,
+ 0, -1,
+ ceil_to_multiple(input->dimension(0), num_elems_processed_per_iteration), input->dimension(1));
AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
window_changed = update_window_and_padding(win, input_access, output_access);
output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
diff --git a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
index cb1996f33e..15e8298e2d 100644
--- a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
@@ -43,6 +43,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *input_squ
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_layout() == DataLayout::NHWC && norm_info.type() == NormType::IN_MAP_2D,
+ "Only Cross-map and 1D In-map normalization is supported for NHWC layout");
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, input_squared);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, input_squared);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(norm_info.norm_size() % 2), "Normalization size should be odd");
@@ -61,8 +63,9 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
{
unsigned int num_elems_processed_per_iteration = 16 / input->element_size();
const unsigned int num_elems_read_per_iteration = num_elems_processed_per_iteration + 2 * (norm_info.norm_size() / 2);
+ const unsigned int norm_idx = get_normalization_dimension_index(input->data_layout(), norm_info);
const unsigned int num_rows = (norm_info.type() == NormType::IN_MAP_2D) ? norm_info.norm_size() : 1;
- const unsigned int border_width = (norm_info.is_cross_map()) ? 0 : std::min<unsigned int>(norm_info.norm_size() / 2, 3U);
+ const unsigned int border_width = (norm_idx == 2) ? 0 : std::min<unsigned int>(norm_info.norm_size() / 2, 3U);
BorderSize border_size = BorderSize(0, border_width);
bool window_changed = false;
@@ -107,7 +110,8 @@ void NENormalizationLayerKernel::configure(const ITensor *input, const ITensor *
// Perform validation step
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), input_squared->info(), output->info(), norm_info));
- const unsigned int border_width = (norm_info.is_cross_map()) ? 0 : std::min<unsigned int>(norm_info.norm_size() / 2, 3U);
+ const unsigned int norm_idx = get_normalization_dimension_index(input->info()->data_layout(), norm_info);
+ const unsigned int border_width = (norm_idx == 2) ? 0 : std::min<unsigned int>(norm_info.norm_size() / 2, 3U);
_input = input;
_input_squared = input_squared;
@@ -119,16 +123,21 @@ void NENormalizationLayerKernel::configure(const ITensor *input, const ITensor *
{
case DataType::F32:
{
- switch(norm_info.type())
+ switch(norm_idx)
{
- case NormType::IN_MAP_1D:
- _func = &NENormalizationLayerKernel::normalize_float<DataType::F32, 0, false>;
- break;
- case NormType::IN_MAP_2D:
- // Normalize over X and Y
- _func = &NENormalizationLayerKernel::normalize_float<DataType::F32, 0, true>;
+ case 0:
+ {
+ if(norm_info.type() == NormType::IN_MAP_2D)
+ {
+ _func = &NENormalizationLayerKernel::normalize_float<DataType::F32, 0, true>;
+ }
+ else
+ {
+ _func = &NENormalizationLayerKernel::normalize_float<DataType::F32, 0, false>;
+ }
break;
- case NormType::CROSS_MAP:
+ }
+ case 2:
_func = &NENormalizationLayerKernel::normalize_float<DataType::F32, 2, false>;
break;
default:
@@ -138,16 +147,21 @@ void NENormalizationLayerKernel::configure(const ITensor *input, const ITensor *
}
case DataType::F16:
{
- switch(norm_info.type())
+ switch(norm_idx)
{
- case NormType::IN_MAP_1D:
- _func = &NENormalizationLayerKernel::normalize_float<DataType::F16, 0, false>;
- break;
- case NormType::IN_MAP_2D:
- // Normalize over X and Y
- _func = &NENormalizationLayerKernel::normalize_float<DataType::F16, 0, true>;
+ case 0:
+ {
+ if(norm_info.type() == NormType::IN_MAP_2D)
+ {
+ _func = &NENormalizationLayerKernel::normalize_float<DataType::F16, 0, true>;
+ }
+ else
+ {
+ _func = &NENormalizationLayerKernel::normalize_float<DataType::F16, 0, false>;
+ }
break;
- case NormType::CROSS_MAP:
+ }
+ case 2:
_func = &NENormalizationLayerKernel::normalize_float<DataType::F16, 2, false>;
break;
default:
diff --git a/src/graph/GraphBuilder.cpp b/src/graph/GraphBuilder.cpp
index d26039ec35..b3721719d9 100644
--- a/src/graph/GraphBuilder.cpp
+++ b/src/graph/GraphBuilder.cpp
@@ -88,10 +88,14 @@ NodeID create_grouped_convolution(Graph &g, const NodeParams &params, NodeIdxPai
bool has_bias = (bias != EmptyNodeID);
// Split input
- NodeID input_split = GraphBuilder::add_split_node(g, params, input, num_groups, 2);
+ const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]);
+ const unsigned int input_idx = get_dimension_idx(input_tensor_desc, DataLayoutDimension::CHANNEL);
+ NodeID input_split = GraphBuilder::add_split_node(g, params, input, num_groups, input_idx);
// Split weights
- NodeID weights_split = GraphBuilder::add_split_node(g, params, { weights, 0 }, num_groups, 3);
+ const TensorDescriptor weights_tensor_desc = get_tensor_descriptor(g, g.node(weights)->outputs()[0]);
+ const unsigned int batch_idx = get_dimension_idx(weights_tensor_desc, DataLayoutDimension::BATCHES);
+ NodeID weights_split = GraphBuilder::add_split_node(g, params, { weights, 0 }, num_groups, batch_idx);
// Split bias
NodeID bias_split = EmptyNodeID;
@@ -122,7 +126,7 @@ NodeID create_grouped_convolution(Graph &g, const NodeParams &params, NodeIdxPai
}
// Depth concatenate output
- return GraphBuilder::add_depth_concatenate_node(g, params, convolution_outputs);
+ return GraphBuilder::add_concatenate_node(g, params, convolution_outputs, DataLayoutDimension::CHANNEL);
}
} // namespace
@@ -329,11 +333,11 @@ NodeID GraphBuilder::add_deconvolution_node(Graph &g, NodeParams params, NodeIdx
return deconv_nid;
}
-NodeID GraphBuilder::add_depth_concatenate_node(Graph &g, NodeParams params, std::vector<NodeIdxPair> inputs)
+NodeID GraphBuilder::add_concatenate_node(Graph &g, NodeParams params, std::vector<NodeIdxPair> inputs, DataLayoutDimension axis)
{
ARM_COMPUTE_ERROR_ON(inputs.size() == 0);
- NodeID nid = g.add_node<DepthConcatenateLayerNode>(inputs.size());
+ NodeID nid = g.add_node<ConcatenateLayerNode>(inputs.size(), axis);
unsigned int i = 0;
for(const auto &input : inputs)
@@ -508,9 +512,9 @@ NodeID GraphBuilder::add_scale_layer(Graph &g, const NodeParams &params, NodeIdx
NodeIdxPair add_const_nidxp = { add_const_nid, 0 };
// Create node and connect
- NodeID mul_node = GraphBuilder::add_elementwise_node(g, params, input, mul_const_nidxp, EltwiseOperation::MUL);
+ NodeID mul_node = GraphBuilder::add_elementwise_node(g, params, input, mul_const_nidxp, EltwiseOperation::Mul);
NodeIdxPair mulnode_nidxp = { mul_node, 0 };
- NodeID add_node = GraphBuilder::add_elementwise_node(g, params, mulnode_nidxp, add_const_nidxp, EltwiseOperation::ADD);
+ NodeID add_node = GraphBuilder::add_elementwise_node(g, params, mulnode_nidxp, add_const_nidxp, EltwiseOperation::Add);
return add_node;
}
diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp
index 4d6734846a..57871487ef 100644
--- a/src/graph/backends/CL/CLFunctionsFactory.cpp
+++ b/src/graph/backends/CL/CLFunctionsFactory.cpp
@@ -89,8 +89,8 @@ std::unique_ptr<IFunction> CLFunctionFactory::create(INode *node, GraphContext &
return detail::create_convolution_layer<CLConvolutionLayerFunctions, CLTargetInfo>(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
case NodeType::DeconvolutionLayer:
return detail::create_deconvolution_layer<CLDeconvolutionLayer, CLTargetInfo>(*polymorphic_downcast<DeconvolutionLayerNode *>(node), ctx);
- case NodeType::DepthConcatenateLayer:
- return detail::create_depth_concatenate_layer<CLDepthConcatenateLayer, CLTargetInfo>(*polymorphic_downcast<DepthConcatenateLayerNode *>(node));
+ case NodeType::ConcatenateLayer:
+ return detail::create_concatenate_layer<CLConcatenateLayer, CLTargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node));
case NodeType::DepthwiseConvolutionLayer:
return detail::create_depthwise_convolution_layer<CLDepthwiseConvolutionLayerFunctions, CLTargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
case NodeType::EltwiseLayer:
diff --git a/src/graph/backends/GLES/GCFunctionsFactory.cpp b/src/graph/backends/GLES/GCFunctionsFactory.cpp
index e6bd5a5f02..f72513c87c 100644
--- a/src/graph/backends/GLES/GCFunctionsFactory.cpp
+++ b/src/graph/backends/GLES/GCFunctionsFactory.cpp
@@ -68,6 +68,42 @@ struct GCEltwiseFunctions
namespace detail
{
+// Specialize functions
+template <>
+std::unique_ptr<IFunction> create_concatenate_layer<GCDepthConcatenateLayer, GCTargetInfo>(ConcatenateLayerNode &node)
+{
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating Concatenate node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
+ ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
+
+ // Return nullptr if depth concatenate is switched off
+ if(!node.is_enabled())
+ {
+ return nullptr;
+ }
+
+ // Extract IO and info
+ std::vector<GCTargetInfo::TensorType *> inputs;
+ for(unsigned int i = 0; i < node.num_inputs(); ++i)
+ {
+ inputs.push_back(get_backing_tensor<GCTargetInfo>(node.input(i)));
+ }
+ typename GCTargetInfo::TensorType *output = get_backing_tensor<GCTargetInfo>(node.output(0));
+
+ // Create and configure function
+ auto func = support::cpp14::make_unique<GCDepthConcatenateLayer>();
+ func->configure(inputs, output);
+
+ // Log info
+ ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << node.type()
+ << " Target " << GCTargetInfo::TargetType
+ << " Data Type: " << output->info()->data_type()
+ << " Shape: " << output->info()->tensor_shape()
+ << " Num Inputs: " << inputs.size()
+ << std::endl);
+
+ return std::move(func);
+}
+
template <>
std::unique_ptr<IFunction> create_convolution_layer<GCConvolutionLayerFunctions, GCTargetInfo>(ConvolutionLayerNode &node, GraphContext &ctx)
{
@@ -92,7 +128,7 @@ std::unique_ptr<IFunction> create_convolution_layer<GCConvolutionLayerFunctions,
std::unique_ptr<IFunction> func;
std::string func_name;
- if(conv_algorithm == ConvolutionMethod::DIRECT)
+ if(conv_algorithm == ConvolutionMethod::Direct)
{
std::tie(func, func_name) = create_named_function<GCConvolutionLayerFunctions::DirectConvolutionLayer>(
std::string("DirectConvolutionLayer"),
@@ -139,7 +175,7 @@ std::unique_ptr<IFunction> create_depthwise_convolution_layer<GCDepthwiseConvolu
// Create and configure function (we assume that functions have been validated before creation)
std::unique_ptr<IFunction> func;
std::string func_name;
- if(dwc_algorithm == DepthwiseConvolutionMethod::OPTIMIZED_3x3)
+ if(dwc_algorithm == DepthwiseConvolutionMethod::Optimized3x3)
{
std::tie(func, func_name) = create_named_function<GCDepthwiseConvolutionLayerFunctions::DepthwiseConvolutionLayer3x3>(
std::string("DepthwiseConvolutionLayer3x3"),
@@ -183,17 +219,17 @@ std::unique_ptr<IFunction> create_eltwise_layer<GCEltwiseFunctions, GCTargetInfo
std::unique_ptr<IFunction> func = nullptr;
std::string func_name;
- if(eltwise_op == EltwiseOperation::ADD)
+ if(eltwise_op == EltwiseOperation::Add)
{
std::tie(func, func_name) = create_named_function<GCEltwiseFunctions::Addition>(
std::string("GCArithmeticAddition"),
input1, input2, output, convert_policy);
}
- else if(eltwise_op == EltwiseOperation::SUB)
+ else if(eltwise_op == EltwiseOperation::Sub)
{
ARM_COMPUTE_ERROR("Arithmetic subtraction is not supported in GLES backend");
}
- else if(eltwise_op == EltwiseOperation::MUL)
+ else if(eltwise_op == EltwiseOperation::Mul)
{
std::tie(func, func_name) = create_named_function<GCEltwiseFunctions::Multiplication>(
std::string("PixelWiseMultiplication"),
@@ -232,8 +268,8 @@ std::unique_ptr<IFunction> GCFunctionFactory::create(INode *node, GraphContext &
return detail::create_batch_normalization_layer<GCBatchNormalizationLayer, GCTargetInfo>(*polymorphic_downcast<BatchNormalizationLayerNode *>(node));
case NodeType::ConvolutionLayer:
return detail::create_convolution_layer<GCConvolutionLayerFunctions, GCTargetInfo>(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
- case NodeType::DepthConcatenateLayer:
- return detail::create_depth_concatenate_layer<GCDepthConcatenateLayer, GCTargetInfo>(*polymorphic_downcast<DepthConcatenateLayerNode *>(node));
+ case NodeType::ConcatenateLayer:
+ return detail::create_concatenate_layer<GCDepthConcatenateLayer, GCTargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node));
case NodeType::DepthwiseConvolutionLayer:
return detail::create_depthwise_convolution_layer<GCDepthwiseConvolutionLayerFunctions, GCTargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
case NodeType::EltwiseLayer:
diff --git a/src/graph/backends/GLES/GCNodeValidator.cpp b/src/graph/backends/GLES/GCNodeValidator.cpp
index 4bef89329a..8118a7c476 100644
--- a/src/graph/backends/GLES/GCNodeValidator.cpp
+++ b/src/graph/backends/GLES/GCNodeValidator.cpp
@@ -58,7 +58,7 @@ Status validate_depthwise_convolution_layer(DepthwiseConvolutionLayerNode &node)
// TODO (geopin01) : Switch when validation is implemented
// Validate function
ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->tensor_shape().x() != 3 && weights->tensor_shape().y() != 3, "Unsupported depthwise convolution");
- node.set_depthwise_convolution_method(DepthwiseConvolutionMethod::OPTIMIZED_3x3);
+ node.set_depthwise_convolution_method(DepthwiseConvolutionMethod::Optimized3x3);
return Status{};
}
@@ -80,14 +80,14 @@ Status validate_convolution_layer(ConvolutionLayerNode &node)
const ConvolutionMethod conv_algorithm = node.convolution_method();
// Validate function
- if(conv_algorithm == ConvolutionMethod::DIRECT)
+ if(conv_algorithm == ConvolutionMethod::Direct)
{
bool is_square = weights->tensor_shape().x() == weights->tensor_shape().y();
bool is_direct = (weights->tensor_shape().x() == 1) || (weights->tensor_shape().x() == 3) || (weights->tensor_shape().x() == 5);
bool is_correct_stride = (conv_info.stride().first) <= 2 && (conv_info.stride().second <= 2);
if(!(is_square && is_direct && is_correct_stride))
{
- node.set_convolution_method(ConvolutionMethod::DEFAULT);
+ node.set_convolution_method(ConvolutionMethod::Default);
}
}
diff --git a/src/graph/backends/NEON/NEFunctionFactory.cpp b/src/graph/backends/NEON/NEFunctionFactory.cpp
index 3b7417da3f..6c912a02f1 100644
--- a/src/graph/backends/NEON/NEFunctionFactory.cpp
+++ b/src/graph/backends/NEON/NEFunctionFactory.cpp
@@ -102,7 +102,7 @@ std::unique_ptr<IFunction> create_convolution_layer<NEConvolutionLayerFunctions,
std::shared_ptr<IMemoryManager> mm = get_memory_manager(ctx, Target::NEON);
std::unique_ptr<IFunction> func;
std::string func_name;
- if(conv_algorithm == ConvolutionMethod::DIRECT)
+ if(conv_algorithm == ConvolutionMethod::Direct)
{
std::tie(func, func_name) = create_named_memory_managed_function<NEDirectConvolutionLayer>(
std::string("DirectConvolutionLayer"), mm, input, weights, biases, output, conv_info);
@@ -112,7 +112,7 @@ std::unique_ptr<IFunction> create_convolution_layer<NEConvolutionLayerFunctions,
std::tie(func, func_name) = create_named_memory_managed_function<NEGEMMConvolutionLayer>(
std::string("GEMMConvolutionLayer"), mm, input, weights, biases, output, conv_info);
}
- else if(conv_algorithm == ConvolutionMethod::WINOGRAD)
+ else if(conv_algorithm == ConvolutionMethod::Winograd)
{
std::tie(func, func_name) = create_named_memory_managed_function<NEWinogradConvolutionLayer>(
std::string("WinogradConvolutionLayer"), mm, input, weights, biases, output, conv_info);
@@ -183,8 +183,8 @@ std::unique_ptr<IFunction> NEFunctionFactory::create(INode *node, GraphContext &
return detail::create_convolution_layer<NEConvolutionLayerFunctions, NETargetInfo>(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
case NodeType::DeconvolutionLayer:
return detail::create_deconvolution_layer<NEDeconvolutionLayer, NETargetInfo>(*polymorphic_downcast<DeconvolutionLayerNode *>(node), ctx);
- case NodeType::DepthConcatenateLayer:
- return detail::create_depth_concatenate_layer<NEDepthConcatenateLayer, NETargetInfo>(*polymorphic_downcast<DepthConcatenateLayerNode *>(node));
+ case NodeType::ConcatenateLayer:
+ return detail::create_concatenate_layer<NEConcatenateLayer, NETargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node));
case NodeType::DepthwiseConvolutionLayer:
return detail::create_depthwise_convolution_layer<NEDepthwiseConvolutionLayerFunctions, NETargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
case NodeType::EltwiseLayer:
diff --git a/src/graph/mutators/DepthConcatSubTensorMutator.cpp b/src/graph/mutators/DepthConcatSubTensorMutator.cpp
index c56f4c5106..241c07b367 100644
--- a/src/graph/mutators/DepthConcatSubTensorMutator.cpp
+++ b/src/graph/mutators/DepthConcatSubTensorMutator.cpp
@@ -25,8 +25,9 @@
#include "arm_compute/graph/Graph.h"
#include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/Utils.h"
#include "arm_compute/graph/backends/BackendRegistry.h"
-#include "arm_compute/graph/nodes/DepthConcatenateLayerNode.h"
+#include "arm_compute/graph/nodes/ConcatenateLayerNode.h"
#include "arm_compute/core/utils/misc/Cast.h"
#include "arm_compute/core/utils/misc/Iterable.h"
@@ -45,11 +46,18 @@ void DepthConcatSubTensorMutator::mutate(Graph &g)
// Should be in reverse order of execution
for(auto &node : arm_compute::utils::iterable::reverse_iterate(g.nodes()))
{
- if(node && node->type() == NodeType::DepthConcatenateLayer && node->output(0) != nullptr)
+ if(node && node->type() == NodeType::ConcatenateLayer && node->output(0) != nullptr)
{
// Get output tensor
auto output_tensor = node->output(0);
+ // Check concatenation axis (Sub-tensor optimization is support for concatenation axis >=2)
+ auto *concat_node = arm_compute::utils::cast::polymorphic_downcast<ConcatenateLayerNode *>(node.get());
+ if(output_tensor == nullptr || get_dimension_idx(output_tensor->desc(), concat_node->concatenation_axis()) < 2)
+ {
+ continue;
+ }
+
// Check that all tensor have the same target and valid inputs
bool is_valid = std::all_of(node->input_edges().cbegin(), node->input_edges().cend(),
[&](const EdgeID & eid)
@@ -76,7 +84,7 @@ void DepthConcatSubTensorMutator::mutate(Graph &g)
depth += input_shape.z();
}
- auto *dc_node = arm_compute::utils::cast::polymorphic_downcast<DepthConcatenateLayerNode *>(node.get());
+ auto *dc_node = arm_compute::utils::cast::polymorphic_downcast<ConcatenateLayerNode *>(node.get());
dc_node->set_enabled(false);
}
}
diff --git a/src/graph/nodes/DepthConcatenateLayerNode.cpp b/src/graph/nodes/ConcatenateLayerNode.cpp
index 08cccc1ff1..ade3f6e1a9 100644
--- a/src/graph/nodes/DepthConcatenateLayerNode.cpp
+++ b/src/graph/nodes/ConcatenateLayerNode.cpp
@@ -21,58 +21,74 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/graph/nodes/DepthConcatenateLayerNode.h"
+#include "arm_compute/graph/nodes/ConcatenateLayerNode.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/graph/Graph.h"
#include "arm_compute/graph/INodeVisitor.h"
+#include "arm_compute/graph/Utils.h"
+
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
namespace arm_compute
{
namespace graph
{
-DepthConcatenateLayerNode::DepthConcatenateLayerNode(unsigned int total_nodes)
- : _total_nodes(total_nodes), _is_enabled(true)
+ConcatenateLayerNode::ConcatenateLayerNode(unsigned int total_nodes, DataLayoutDimension axis)
+ : _total_nodes(total_nodes), _axis(axis), _is_enabled(true)
{
_input_edges.resize(_total_nodes, EmptyEdgeID);
_outputs.resize(1, NullTensorID);
}
-void DepthConcatenateLayerNode::set_enabled(bool is_enabled)
+void ConcatenateLayerNode::set_enabled(bool is_enabled)
{
_is_enabled = is_enabled;
}
-bool DepthConcatenateLayerNode::is_enabled() const
+bool ConcatenateLayerNode::is_enabled() const
{
return _is_enabled;
}
-TensorDescriptor DepthConcatenateLayerNode::compute_output_descriptor(const std::vector<TensorDescriptor> &input_descriptors)
+DataLayoutDimension ConcatenateLayerNode::concatenation_axis() const
+{
+ return _axis;
+}
+
+TensorDescriptor ConcatenateLayerNode::compute_output_descriptor(const std::vector<TensorDescriptor> &input_descriptors,
+ DataLayoutDimension axis)
{
ARM_COMPUTE_ERROR_ON(input_descriptors.size() == 0);
TensorDescriptor output_descriptor = input_descriptors[0];
+ const int axis_idx = get_dimension_idx(output_descriptor, axis);
- size_t max_x = 0;
- size_t max_y = 0;
- size_t depth = 0;
-
- for(const auto &input_descriptor : input_descriptors)
+ // Extract shapes
+ std::vector<const TensorShape *> shapes;
+ for(auto &input_descriptor : input_descriptors)
{
- max_x = std::max(input_descriptor.shape.x(), max_x);
- max_y = std::max(input_descriptor.shape.y(), max_y);
- depth += input_descriptor.shape.z();
+ shapes.emplace_back(&input_descriptor.shape);
}
- output_descriptor.shape.set(0, max_x);
- output_descriptor.shape.set(1, max_y);
- output_descriptor.shape.set(2, depth);
+ // Calculate output shape
+ if(axis_idx == 0)
+ {
+ output_descriptor.shape = arm_compute::misc::shape_calculator::calculate_width_concatenate_shape(shapes);
+ }
+ else if(axis_idx == 2)
+ {
+ output_descriptor.shape = arm_compute::misc::shape_calculator::calculate_depth_concatenate_shape(shapes);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Unsupported concatenation axis!");
+ }
return output_descriptor;
}
-bool DepthConcatenateLayerNode::forward_descriptors()
+bool ConcatenateLayerNode::forward_descriptors()
{
if(_outputs[0] != NullTensorID)
{
@@ -84,7 +100,7 @@ bool DepthConcatenateLayerNode::forward_descriptors()
return false;
}
-TensorDescriptor DepthConcatenateLayerNode::configure_output(size_t idx) const
+TensorDescriptor ConcatenateLayerNode::configure_output(size_t idx) const
{
ARM_COMPUTE_UNUSED(idx);
ARM_COMPUTE_ERROR_ON(idx >= _outputs.size());
@@ -106,18 +122,18 @@ TensorDescriptor DepthConcatenateLayerNode::configure_output(size_t idx) const
ARM_COMPUTE_ERROR_ON(t == nullptr);
inputs_descriptors.push_back(t->desc());
}
- output_info = compute_output_descriptor(inputs_descriptors);
+ output_info = compute_output_descriptor(inputs_descriptors, _axis);
}
return output_info;
}
-NodeType DepthConcatenateLayerNode::type() const
+NodeType ConcatenateLayerNode::type() const
{
- return NodeType::DepthConcatenateLayer;
+ return NodeType::ConcatenateLayer;
}
-void DepthConcatenateLayerNode::accept(INodeVisitor &v)
+void ConcatenateLayerNode::accept(INodeVisitor &v)
{
v.visit(*this);
}
diff --git a/src/graph/printers/DotGraphPrinter.cpp b/src/graph/printers/DotGraphPrinter.cpp
index 61cf42356f..ef156ea252 100644
--- a/src/graph/printers/DotGraphPrinter.cpp
+++ b/src/graph/printers/DotGraphPrinter.cpp
@@ -47,17 +47,19 @@ void DotGraphVisitor::visit(BatchNormalizationLayerNode &n)
_info = ss.str();
}
-void DotGraphVisitor::visit(ConvolutionLayerNode &n)
+void DotGraphVisitor::visit(ConcatenateLayerNode &n)
{
std::stringstream ss;
- ss << n.convolution_method();
+ ss << "Enabled: " << n.is_enabled();
+ ss << R"( \n )";
+ ss << "Axis: " << n.concatenation_axis();
_info = ss.str();
}
-void DotGraphVisitor::visit(DepthConcatenateLayerNode &n)
+void DotGraphVisitor::visit(ConvolutionLayerNode &n)
{
std::stringstream ss;
- ss << "Enabled: " << n.is_enabled();
+ ss << n.convolution_method();
_info = ss.str();
}
diff --git a/utils/CommonGraphOptions.cpp b/utils/CommonGraphOptions.cpp
index 7ac7bbce2b..2e5d787448 100644
--- a/utils/CommonGraphOptions.cpp
+++ b/utils/CommonGraphOptions.cpp
@@ -84,7 +84,7 @@ namespace utils
os << "Data layout : " << common_params.data_layout << std::endl;
os << "Tuner enabled? : " << (common_params.enable_tuner ? true_str : false_str) << std::endl;
os << "Tuner file : " << common_params.tuner_file << std::endl;
- os << "Fast math enabled? : " << (common_params.fast_math_hint == FastMathHint::ENABLED ? true_str : false_str) << std::endl;
+ os << "Fast math enabled? : " << (common_params.fast_math_hint == FastMathHint::Enabled ? true_str : false_str) << std::endl;
if(!common_params.data_path.empty())
{
os << "Data path : " << common_params.data_path << std::endl;
@@ -168,7 +168,7 @@ CommonGraphOptions::CommonGraphOptions(CommandLineParser &parser)
CommonGraphParams consume_common_graph_parameters(CommonGraphOptions &options)
{
- FastMathHint fast_math_hint_value = options.fast_math_hint->value() ? FastMathHint::ENABLED : FastMathHint::DISABLED;
+ FastMathHint fast_math_hint_value = options.fast_math_hint->value() ? FastMathHint::Enabled : FastMathHint::Disabled;
auto validation_range = parse_validation_range(options.validation_range->value());
CommonGraphParams common_params;
@@ -178,7 +178,7 @@ CommonGraphParams consume_common_graph_parameters(CommonGraphOptions &options)
common_params.data_type = options.data_type->value();
common_params.data_layout = options.data_layout->value();
common_params.enable_tuner = options.enable_tuner->is_set() ? options.enable_tuner->value() : false;
- common_params.fast_math_hint = options.fast_math_hint->is_set() ? fast_math_hint_value : FastMathHint::DISABLED;
+ common_params.fast_math_hint = options.fast_math_hint->is_set() ? fast_math_hint_value : FastMathHint::Disabled;
common_params.data_path = options.data_path->value();
common_params.image = options.image->value();
common_params.labels = options.labels->value();
diff --git a/utils/CommonGraphOptions.h b/utils/CommonGraphOptions.h
index ce4a2c9dd1..23c3cc7c30 100644
--- a/utils/CommonGraphOptions.h
+++ b/utils/CommonGraphOptions.h
@@ -93,7 +93,7 @@ struct CommonGraphParams
arm_compute::DataType data_type{ DataType::F32 };
arm_compute::DataLayout data_layout{ DataLayout::NCHW };
bool enable_tuner{ false };
- arm_compute::graph::FastMathHint fast_math_hint{ arm_compute::graph::FastMathHint::DISABLED };
+ arm_compute::graph::FastMathHint fast_math_hint{ arm_compute::graph::FastMathHint::Disabled };
std::string data_path{};
std::string image{};
std::string labels{};
diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h
index eb03ff9b96..8bfdba9725 100644
--- a/utils/TypePrinter.h
+++ b/utils/TypePrinter.h
@@ -475,6 +475,35 @@ inline std::string to_string(const arm_compute::DataLayout &data_layout)
return str.str();
}
+/** Formatted output of the DataLayoutDimension type.
+ *
+ * @param[out] os Output stream.
+ * @param[in] data_layout_dim Data layout dimension to print.
+ *
+ * @return Modified output stream.
+ */
+inline ::std::ostream &operator<<(::std::ostream &os, const DataLayoutDimension &data_layout_dim)
+{
+ switch(data_layout_dim)
+ {
+ case DataLayoutDimension::WIDTH:
+ os << "WIDTH";
+ break;
+ case DataLayoutDimension::HEIGHT:
+ os << "HEIGHT";
+ break;
+ case DataLayoutDimension::CHANNEL:
+ os << "CHANNEL";
+ break;
+ case DataLayoutDimension::BATCHES:
+ os << "BATCHES";
+ break;
+ default:
+ ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+ }
+ return os;
+}
+
/** Formatted output of the DataType type.
*
* @param[out] os Output stream.