From 88d5b22eb5574d8b564474df2c758d222b3b5547 Mon Sep 17 00:00:00 2001 From: Isabella Gottardi Date: Fri, 6 Apr 2018 12:24:55 +0100 Subject: COMPMID-1035 - Add ResneXt50 as a graph example Change-Id: I42f0e7dab38e45b5eecfe6858eaecee8939c8585 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/129291 Reviewed-by: Georgios Pinitas Reviewed-by: Anthony Barbier Tested-by: Jenkins --- arm_compute/core/utils/misc/Utility.h | 10 ++ arm_compute/graph/GraphBuilder.h | 14 ++ arm_compute/graph/Types.h | 1 + arm_compute/graph/frontend/Layers.h | 27 ++++ arm_compute/graph/nodes/EltwiseLayerNode.h | 20 ++- examples/graph_alexnet.cpp | 2 +- examples/graph_googlenet.cpp | 4 +- examples/graph_inception_v3.cpp | 2 +- examples/graph_lenet.cpp | 2 +- examples/graph_mobilenet.cpp | 2 +- examples/graph_resnet50.cpp | 6 +- examples/graph_resnext50.cpp | 208 +++++++++++++++++++++++++ examples/graph_squeezenet.cpp | 4 +- examples/graph_squeezenet_v1_1.cpp | 4 +- examples/graph_vgg16.cpp | 4 +- examples/graph_vgg19.cpp | 4 +- src/graph/GraphBuilder.cpp | 29 ++++ src/graph/backends/CL/CLFunctionsFactory.cpp | 17 +- src/graph/backends/GLES/GCFunctionsFactory.cpp | 11 +- src/graph/backends/NEON/NEFunctionFactory.cpp | 15 +- src/graph/nodes/EltwiseLayerNode.cpp | 17 +- utils/GraphUtils.cpp | 39 +++++ utils/GraphUtils.h | 64 +++++++- utils/Utils.h | 37 +++++ 24 files changed, 499 insertions(+), 44 deletions(-) create mode 100644 examples/graph_resnext50.cpp diff --git a/arm_compute/core/utils/misc/Utility.h b/arm_compute/core/utils/misc/Utility.h index 639f2e155d..f30a417a09 100644 --- a/arm_compute/core/utils/misc/Utility.h +++ b/arm_compute/core/utils/misc/Utility.h @@ -164,6 +164,16 @@ std::vector sort_indices(const std::vector &v) return idx; } + +inline bool endswith(const std::string &filename, const std::string &suffix) +{ + if(filename.size() < suffix.size()) + { + return false; + } + return std::equal(suffix.rbegin(), suffix.rend(), filename.rbegin()); +} + } // namespace utility } // namespace arm_compute #endif /* __ARM_COMPUTE_MISC_UTILITY_H__ */ diff --git a/arm_compute/graph/GraphBuilder.h b/arm_compute/graph/GraphBuilder.h index aea28eb8d6..04edf673d1 100644 --- a/arm_compute/graph/GraphBuilder.h +++ b/arm_compute/graph/GraphBuilder.h @@ -213,6 +213,20 @@ public: * @return Node ID of the created node, EmptyNodeID in case of error */ static NodeID add_reshape_node(Graph &g, NodeParams params, NodeIdxPair input, TensorShape shape); + /** Adds a scale layer node to the graph + * This layer computes a product of the input with a scale (read from mul_accessor) and it applies an offset (read from add_accessor). + * output = input * mul_w + add_w + * + * @param[in] g Graph to add the layer to + * @param[in] params Common node parameters + * @param[in] input Input to the fully connected layer node as a NodeID-Index pair + * @param[in] mul_accessor (Optional) Accessor of the mul node data + * @param[in] add_accessor (Optional) Accessor of the add node data + * + * @return Node ID of the created node, EmptyNodeID in case of error + */ + static NodeID add_scale_layer(Graph &g, const NodeParams ¶ms, NodeIdxPair input, + ITensorAccessorUPtr mul_accessor = nullptr, ITensorAccessorUPtr add_accessor = nullptr); /** Adds a softmax node to the graph * * @param[in] g Graph to add the node to diff --git a/arm_compute/graph/Types.h b/arm_compute/graph/Types.h index a910610c7a..d4e4f99377 100644 --- a/arm_compute/graph/Types.h +++ b/arm_compute/graph/Types.h @@ -137,6 +137,7 @@ enum class NodeType NormalizationLayer, PoolingLayer, ReshapeLayer, + ScaleLayer, SoftmaxLayer, SplitLayer, diff --git a/arm_compute/graph/frontend/Layers.h b/arm_compute/graph/frontend/Layers.h index d122a7a967..a97684453c 100644 --- a/arm_compute/graph/frontend/Layers.h +++ b/arm_compute/graph/frontend/Layers.h @@ -380,6 +380,33 @@ private: TensorShape _shape; }; +/** Scale Layer */ +class ScaleLayer final : public ILayer +{ +public: + /** Construct a scale layer. + * + * @param[in] mul_w Accessor to get mul weight from. + * @param[in] add_w Accessor to get add weight from. + */ + ScaleLayer(ITensorAccessorUPtr mul_w, + ITensorAccessorUPtr add_w) + : _mul_w(std::move(mul_w)), _add_w(std::move(add_w)) + { + } + + NodeID create_layer(IStream &s) override + { + NodeParams common_params = { name(), s.hints().target_hint }; + NodeIdxPair input = { s.tail_node(), 0 }; + return GraphBuilder::add_scale_layer(s.graph(), common_params, input, std::move(_mul_w), std::move(_add_w)); + } + +private: + ITensorAccessorUPtr _mul_w; + ITensorAccessorUPtr _add_w; +}; + /** Softmax Layer */ class SoftmaxLayer final : public ILayer { diff --git a/arm_compute/graph/nodes/EltwiseLayerNode.h b/arm_compute/graph/nodes/EltwiseLayerNode.h index 5b9fa84bbb..09cbc75b80 100644 --- a/arm_compute/graph/nodes/EltwiseLayerNode.h +++ b/arm_compute/graph/nodes/EltwiseLayerNode.h @@ -36,15 +36,29 @@ class EltwiseLayerNode final : public INode public: /** Constructor * - * @param[in] op Element-wise operation to perform + * @param[in] op Element-wise operation to perform + * @param[in] c_policy (Optional) Convert policy used for the operation + * @param[in] r_policy (Optional) Rounding policy used for the operation */ - EltwiseLayerNode(EltwiseOperation op); + EltwiseLayerNode(EltwiseOperation op, ConvertPolicy c_policy = ConvertPolicy::SATURATE, RoundingPolicy r_policy = RoundingPolicy::TO_ZERO); /** Eltwise operation accessor * * @return Eltwise operation that is to be performed by the node */ EltwiseOperation eltwise_operation() const; + /** Convert policy accessor + * + * @return Convert policy that is used in the node + */ + ConvertPolicy convert_policy() const; + + /** Rounding policy accessor + * + * @return Convert policy that is used in the node + */ + RoundingPolicy rounding_policy() const; + // Inherited overridden methods: NodeType type() const override; bool forward_descriptors() override; @@ -53,6 +67,8 @@ public: private: EltwiseOperation _op; + ConvertPolicy _convert_policy; + RoundingPolicy _rounding_policy; }; } // namespace graph } // namespace arm_compute diff --git a/examples/graph_alexnet.cpp b/examples/graph_alexnet.cpp index ffcd8b8411..9e6d91962e 100644 --- a/examples/graph_alexnet.cpp +++ b/examples/graph_alexnet.cpp @@ -195,7 +195,7 @@ private: /** Main program for AlexNet * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ int main(int argc, char **argv) { diff --git a/examples/graph_googlenet.cpp b/examples/graph_googlenet.cpp index a47fc9d588..2dba67f5eb 100644 --- a/examples/graph_googlenet.cpp +++ b/examples/graph_googlenet.cpp @@ -36,7 +36,7 @@ using namespace arm_compute::graph_utils; /** Example demonstrating how to implement Googlenet's network using the Compute Library's graph API * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ class GraphGooglenetExample : public Example { @@ -215,7 +215,7 @@ private: /** Main program for Googlenet * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ int main(int argc, char **argv) { diff --git a/examples/graph_inception_v3.cpp b/examples/graph_inception_v3.cpp index c92e69e6a7..d1d6ab4e05 100644 --- a/examples/graph_inception_v3.cpp +++ b/examples/graph_inception_v3.cpp @@ -36,7 +36,7 @@ using namespace arm_compute::graph_utils; /** Example demonstrating how to implement InceptionV3's network using the Compute Library's graph API * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ class InceptionV3Example : public Example { diff --git a/examples/graph_lenet.cpp b/examples/graph_lenet.cpp index 92be2d48c1..32c75827d3 100644 --- a/examples/graph_lenet.cpp +++ b/examples/graph_lenet.cpp @@ -136,7 +136,7 @@ private: /** Main program for LeNet * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] batches, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] batches, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ int main(int argc, char **argv) { diff --git a/examples/graph_mobilenet.cpp b/examples/graph_mobilenet.cpp index 7bfc6808fa..50dc02482f 100644 --- a/examples/graph_mobilenet.cpp +++ b/examples/graph_mobilenet.cpp @@ -232,7 +232,7 @@ private: /** Main program for MobileNetV1 * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), * [optional] Model ID (0 = MobileNetV1_1.0_224, 1 = MobileNetV1_0.75_160), * [optional] Path to the weights folder, * [optional] image, diff --git a/examples/graph_resnet50.cpp b/examples/graph_resnet50.cpp index 7332bb8b15..bafa9a5852 100644 --- a/examples/graph_resnet50.cpp +++ b/examples/graph_resnet50.cpp @@ -32,10 +32,10 @@ using namespace arm_compute::utils; using namespace arm_compute::graph::frontend; using namespace arm_compute::graph_utils; -/** Example demonstrating how to implement Microsoft's ResNet50 network using the Compute Library's graph API +/** Example demonstrating how to implement ResNet50 network using the Compute Library's graph API * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ class GraphResNet50Example : public Example { @@ -252,7 +252,7 @@ private: /** Main program for ResNet50 * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ int main(int argc, char **argv) { diff --git a/examples/graph_resnext50.cpp b/examples/graph_resnext50.cpp new file mode 100644 index 0000000000..f96a02e6d6 --- /dev/null +++ b/examples/graph_resnext50.cpp @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/graph.h" +#include "support/ToolchainSupport.h" +#include "utils/GraphUtils.h" +#include "utils/Utils.h" + +#include + +using namespace arm_compute::utils; +using namespace arm_compute::graph::frontend; +using namespace arm_compute::graph_utils; + +/** Example demonstrating how to implement ResNeXt50 network using the Compute Library's graph API + * + * @param[in] argc Number of arguments + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] npy_in, [optional] npy_out, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) + */ +class GraphResNeXt50Example : public Example +{ +public: + void do_setup(int argc, char **argv) override + { + std::string data_path; /* Path to the trainable data */ + std::string npy_in; /* Input npy data */ + std::string npy_out; /* Output npy data */ + + // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON + const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; + Target target_hint = set_target_hint(target); + FastMathHint fast_math_hint = FastMathHint::DISABLED; + + // Parse arguments + if(argc < 2) + { + // Print help + std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [npy_in] [npy_out] [fast_math_hint]\n\n"; + std::cout << "No data folder provided: using random values\n\n"; + } + else if(argc == 2) + { + std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [npy_in] [npy_out] [fast_math_hint]\n\n"; + std::cout << "No data folder provided: using random values\n\n"; + } + else if(argc == 3) + { + data_path = argv[2]; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [npy_in] [npy_out] [fast_math_hint]\n\n"; + std::cout << "No input npy file provided: using random values\n\n"; + } + else if(argc == 4) + { + data_path = argv[2]; + npy_in = argv[3]; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [npy_out] [fast_math_hint]\n\n"; + std::cout << "No output npy file provided: skipping output accessor\n\n"; + } + else if(argc == 5) + { + data_path = argv[2]; + npy_in = argv[3]; + npy_out = argv[4]; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n"; + std::cout << "No fast math info provided: disabling fast math\n\n"; + } + else + { + data_path = argv[2]; + npy_in = argv[3]; + npy_out = argv[4]; + fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED; + } + + graph << target_hint + << fast_math_hint + << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::F32), + get_input_accessor(npy_in)) + << ScaleLayer(get_weights_accessor(data_path, "/cnn_data/resnext50_model/bn_data_mul.npy"), + get_weights_accessor(data_path, "/cnn_data/resnext50_model/bn_data_add.npy")) + .set_name("bn_data/Scale") + << ConvolutionLayer( + 7U, 7U, 64U, + get_weights_accessor(data_path, "/cnn_data/resnext50_model/conv0_weights.npy"), + get_weights_accessor(data_path, "/cnn_data/resnext50_model/conv0_biases.npy"), + PadStrideInfo(2, 2, 2, 3, 2, 3, DimensionRoundingType::FLOOR)) + .set_name("conv0/Convolution") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv0/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))).set_name("pool0"); + + add_residual_block(data_path, /*ofm*/ 256, /*stage*/ 1, /*num_unit*/ 3, /*stride_conv_unit1*/ 1); + add_residual_block(data_path, 512, 2, 4, 2); + add_residual_block(data_path, 1024, 3, 6, 2); + add_residual_block(data_path, 2048, 4, 3, 2); + + graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG)).set_name("pool1") + << FlattenLayer().set_name("predictions/Reshape") + << OutputLayer(get_npy_output_accessor(npy_out, TensorShape(2048U), DataType::F32)); + + // Finalize graph + GraphConfig config; + config.use_tuner = (target == 2); + graph.finalize(target_hint, config); + } + + void do_run() override + { + // Run graph + graph.run(); + } + +private: + Stream graph{ 0, "ResNeXt50" }; + + void add_residual_block(const std::string &data_path, unsigned int base_depth, unsigned int stage, unsigned int num_units, unsigned int stride_conv_unit1) + { + for(unsigned int i = 0; i < num_units; ++i) + { + std::stringstream unit_path_ss; + unit_path_ss << "/cnn_data/resnext50_model/stage" << stage << "_unit" << (i + 1) << "_"; + std::string unit_path = unit_path_ss.str(); + + std::stringstream unit_name_ss; + unit_name_ss << "stage" << stage << "/unit" << (i + 1) << "/"; + std::string unit_name = unit_name_ss.str(); + + PadStrideInfo pad_grouped_conv(1, 1, 1, 1); + if(i == 0) + { + pad_grouped_conv = (stage == 1) ? PadStrideInfo(stride_conv_unit1, stride_conv_unit1, 1, 1) : PadStrideInfo(stride_conv_unit1, stride_conv_unit1, 0, 1, 0, 1, DimensionRoundingType::FLOOR); + } + + SubStream right(graph); + right << ConvolutionLayer( + 1U, 1U, base_depth / 2, + get_weights_accessor(data_path, unit_path + "conv1_weights.npy"), + get_weights_accessor(data_path, unit_path + "conv1_biases.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "conv1/convolution") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "conv1/Relu") + + << ConvolutionLayer( + 3U, 3U, base_depth / 2, + get_weights_accessor(data_path, unit_path + "conv2_weights.npy"), + std::unique_ptr(nullptr), + pad_grouped_conv, 32) + .set_name(unit_name + "conv2/convolution") + << ScaleLayer(get_weights_accessor(data_path, unit_path + "bn2_mul.npy"), + get_weights_accessor(data_path, unit_path + "bn2_add.npy")) + .set_name(unit_name + "conv1/Scale") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "conv2/Relu") + + << ConvolutionLayer( + 1U, 1U, base_depth, + get_weights_accessor(data_path, unit_path + "conv3_weights.npy"), + get_weights_accessor(data_path, unit_path + "conv3_biases.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "conv3/convolution"); + + SubStream left(graph); + if(i == 0) + { + left << ConvolutionLayer( + 1U, 1U, base_depth, + get_weights_accessor(data_path, unit_path + "sc_weights.npy"), + std::unique_ptr(nullptr), + PadStrideInfo(stride_conv_unit1, stride_conv_unit1, 0, 0)) + .set_name(unit_name + "sc/convolution") + << ScaleLayer(get_weights_accessor(data_path, unit_path + "sc_bn_mul.npy"), + get_weights_accessor(data_path, unit_path + "sc_bn_add.npy")) + .set_name(unit_name + "sc/scale"); + } + + graph << BranchLayer(BranchMergeMethod::ADD, std::move(left), std::move(right)).set_name(unit_name + "add"); + graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Relu"); + } + } +}; + +/** Main program for ResNeXt50 + * + * @param[in] argc Number of arguments + * @param[in] argv Arguments ( [[optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] npy_in, [optional] npy_out ) + */ +int main(int argc, char **argv) +{ + return arm_compute::utils::run_example(argc, argv); +} diff --git a/examples/graph_squeezenet.cpp b/examples/graph_squeezenet.cpp index 4d7bcf1ca8..b632688839 100644 --- a/examples/graph_squeezenet.cpp +++ b/examples/graph_squeezenet.cpp @@ -37,7 +37,7 @@ using namespace arm_compute::logging; /** Example demonstrating how to implement Squeezenet's network using the Compute Library's graph API * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ class GraphSqueezenetExample : public Example { @@ -218,7 +218,7 @@ private: /** Main program for Squeezenet v1.0 * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ int main(int argc, char **argv) { diff --git a/examples/graph_squeezenet_v1_1.cpp b/examples/graph_squeezenet_v1_1.cpp index f5fede2f70..9e3466b993 100644 --- a/examples/graph_squeezenet_v1_1.cpp +++ b/examples/graph_squeezenet_v1_1.cpp @@ -40,7 +40,7 @@ namespace /** Example demonstrating how to implement Squeezenet's v1.1 network using the Compute Library's graph API * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ class GraphSqueezenet_v1_1Example : public Example { @@ -223,7 +223,7 @@ private: /** Main program for Squeezenet v1.1 * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ int main(int argc, char **argv) { diff --git a/examples/graph_vgg16.cpp b/examples/graph_vgg16.cpp index 6db4e386de..72e724025b 100644 --- a/examples/graph_vgg16.cpp +++ b/examples/graph_vgg16.cpp @@ -35,7 +35,7 @@ using namespace arm_compute::graph_utils; /** Example demonstrating how to implement VGG16's network using the Compute Library's graph API * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ class GraphVGG16Example : public Example { @@ -257,7 +257,7 @@ private: /** Main program for VGG16 * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ int main(int argc, char **argv) { diff --git a/examples/graph_vgg19.cpp b/examples/graph_vgg19.cpp index 5a281ea86a..b15c3f2def 100644 --- a/examples/graph_vgg19.cpp +++ b/examples/graph_vgg19.cpp @@ -35,7 +35,7 @@ using namespace arm_compute::graph_utils; /** Example demonstrating how to implement VGG19's network using the Compute Library's graph API * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ class GraphVGG19Example : public Example { @@ -270,7 +270,7 @@ private: /** Main program for VGG19 * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ int main(int argc, char **argv) { diff --git a/src/graph/GraphBuilder.cpp b/src/graph/GraphBuilder.cpp index df94d0b169..4c5d30a33f 100644 --- a/src/graph/GraphBuilder.cpp +++ b/src/graph/GraphBuilder.cpp @@ -399,6 +399,35 @@ NodeID GraphBuilder::add_reshape_node(Graph &g, NodeParams params, NodeIdxPair i return create_simple_single_input_output_node(g, params, input, shape); } +NodeID GraphBuilder::add_scale_layer(Graph &g, const NodeParams ¶ms, NodeIdxPair input, ITensorAccessorUPtr mul_accessor, ITensorAccessorUPtr add_accessor) +{ + CHECK_NODEIDX_PAIR(input, g); + + // Get input tensor descriptor + const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]); + + // Create mul node + TensorDescriptor mul_desc = input_tensor_desc; + const size_t C = input_tensor_desc.shape[get_dimension_idx(mul_desc, DataLayoutDimension::CHANNEL)]; + mul_desc.shape.set(get_dimension_idx(input_tensor_desc, DataLayoutDimension::WIDTH), 1); + mul_desc.shape.set(get_dimension_idx(input_tensor_desc, DataLayoutDimension::HEIGHT), 1); + mul_desc.shape.set(get_dimension_idx(input_tensor_desc, DataLayoutDimension::CHANNEL), C); + NodeID mul_const_nid = add_const_node_with_name(g, params, "Mul", mul_desc, std::move(mul_accessor)); + NodeIdxPair mul_const_nidxp = { mul_const_nid, 0 }; + + // Create add node + TensorDescriptor add_desc = mul_desc; + NodeID add_const_nid = add_const_node_with_name(g, params, "Add", add_desc, std::move(add_accessor)); + NodeIdxPair add_const_nidxp = { add_const_nid, 0 }; + + // Create node and connect + NodeID mul_node = GraphBuilder::add_elementwise_node(g, params, input, mul_const_nidxp, EltwiseOperation::MUL); + NodeIdxPair mulnode_nidxp = { mul_node, 0 }; + NodeID add_node = GraphBuilder::add_elementwise_node(g, params, mulnode_nidxp, add_const_nidxp, EltwiseOperation::ADD); + + return add_node; +} + NodeID GraphBuilder::add_softmax_node(Graph &g, NodeParams params, NodeIdxPair input, float beta) { return create_simple_single_input_output_node(g, params, input, beta); diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp index 4626cb5781..ac04f1063c 100644 --- a/src/graph/backends/CL/CLFunctionsFactory.cpp +++ b/src/graph/backends/CL/CLFunctionsFactory.cpp @@ -313,10 +313,11 @@ std::unique_ptr create_eltwise_layer(EltwiseLayerNode &node) ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1); // Extract IO and info - ICLTensor *input1 = get_backing_tensor(node.input(0)); - ICLTensor *input2 = get_backing_tensor(node.input(1)); - ICLTensor *output = get_backing_tensor(node.output(0)); - const EltwiseOperation eltwise_op = node.eltwise_operation(); + ICLTensor *input1 = get_backing_tensor(node.input(0)); + ICLTensor *input2 = get_backing_tensor(node.input(1)); + ICLTensor *output = get_backing_tensor(node.output(0)); + const EltwiseOperation eltwise_op = node.eltwise_operation(); + const ConvertPolicy convert_policy = node.convert_policy(); ARM_COMPUTE_ERROR_ON(input1 == nullptr); ARM_COMPUTE_ERROR_ON(input2 == nullptr); ARM_COMPUTE_ERROR_ON(output == nullptr); @@ -327,18 +328,18 @@ std::unique_ptr create_eltwise_layer(EltwiseLayerNode &node) { std::tie(func, func_name) = create_named_function(std::string("CLArithmeticAddition"), input1, input2, output, - ConvertPolicy::SATURATE); + convert_policy); } else if(eltwise_op == EltwiseOperation::SUB) { std::tie(func, func_name) = create_named_function( - std::string("CLArithmeticSubtraction"), input1, input2, output, ConvertPolicy::SATURATE); + std::string("CLArithmeticSubtraction"), input1, input2, output, convert_policy); } else if(eltwise_op == EltwiseOperation::MUL) { std::tie(func, func_name) = create_named_function( - std::string("CLPixelWiseMultiplication"), input1, input2, output, 1.f, ConvertPolicy::SATURATE, - RoundingPolicy::TO_NEAREST_EVEN); + std::string("CLPixelWiseMultiplication"), input1, input2, output, 1.f, convert_policy, + node.rounding_policy()); } else { diff --git a/src/graph/backends/GLES/GCFunctionsFactory.cpp b/src/graph/backends/GLES/GCFunctionsFactory.cpp index d3c5737e68..d53daf1109 100644 --- a/src/graph/backends/GLES/GCFunctionsFactory.cpp +++ b/src/graph/backends/GLES/GCFunctionsFactory.cpp @@ -301,10 +301,11 @@ std::unique_ptr create_eltwise_layer(EltwiseLayerNode &node) ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1); // Extract IO and info - IGCTensor *input1 = get_backing_tensor(node.input(0)); - IGCTensor *input2 = get_backing_tensor(node.input(1)); - IGCTensor *output = get_backing_tensor(node.output(0)); - const EltwiseOperation eltwise_op = node.eltwise_operation(); + IGCTensor *input1 = get_backing_tensor(node.input(0)); + IGCTensor *input2 = get_backing_tensor(node.input(1)); + IGCTensor *output = get_backing_tensor(node.output(0)); + const EltwiseOperation eltwise_op = node.eltwise_operation(); + const ConvertPolicy convert_policy = node.convert_policy(); ARM_COMPUTE_ERROR_ON(input1 == nullptr); ARM_COMPUTE_ERROR_ON(input2 == nullptr); ARM_COMPUTE_ERROR_ON(output == nullptr); @@ -315,7 +316,7 @@ std::unique_ptr create_eltwise_layer(EltwiseLayerNode &node) { std::tie(func, func_name) = create_named_function(std::string("GCArithmeticAddition"), input1, input2, output, - ConvertPolicy::SATURATE); + convert_policy); } else if(eltwise_op == EltwiseOperation::SUB) { diff --git a/src/graph/backends/NEON/NEFunctionFactory.cpp b/src/graph/backends/NEON/NEFunctionFactory.cpp index 7a37dfa39d..7f97876e57 100644 --- a/src/graph/backends/NEON/NEFunctionFactory.cpp +++ b/src/graph/backends/NEON/NEFunctionFactory.cpp @@ -294,10 +294,11 @@ std::unique_ptr create_eltwise_layer(EltwiseLayerNode &node) ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1); // Extract IO and info - ITensor *input1 = get_backing_tensor(node.input(0)); - ITensor *input2 = get_backing_tensor(node.input(1)); - ITensor *output = get_backing_tensor(node.output(0)); - const EltwiseOperation eltwise_op = node.eltwise_operation(); + ITensor *input1 = get_backing_tensor(node.input(0)); + ITensor *input2 = get_backing_tensor(node.input(1)); + ITensor *output = get_backing_tensor(node.output(0)); + const EltwiseOperation eltwise_op = node.eltwise_operation(); + const ConvertPolicy convert_policy = node.convert_policy(); ARM_COMPUTE_ERROR_ON(input1 == nullptr); ARM_COMPUTE_ERROR_ON(input2 == nullptr); ARM_COMPUTE_ERROR_ON(output == nullptr); @@ -307,18 +308,18 @@ std::unique_ptr create_eltwise_layer(EltwiseLayerNode &node) if(eltwise_op == EltwiseOperation::ADD) { std::tie(func, func_name) = create_named_function(std::string("NEArithmeticAddition"), - input1, input2, output, ConvertPolicy::SATURATE); + input1, input2, output, convert_policy); } else if(eltwise_op == EltwiseOperation::SUB) { std::tie(func, func_name) = create_named_function(std::string("NEArithmeticSubtraction"), - input1, input2, output, ConvertPolicy::SATURATE); + input1, input2, output, convert_policy); } else if(eltwise_op == EltwiseOperation::MUL) { std::tie(func, func_name) = create_named_function(std::string("NEPixelWiseMultiplication"), input1, input2, output, 1.f, - ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN); + convert_policy, node.rounding_policy()); } else { diff --git a/src/graph/nodes/EltwiseLayerNode.cpp b/src/graph/nodes/EltwiseLayerNode.cpp index 6f1e0eecd9..568b882425 100644 --- a/src/graph/nodes/EltwiseLayerNode.cpp +++ b/src/graph/nodes/EltwiseLayerNode.cpp @@ -30,8 +30,8 @@ namespace arm_compute { namespace graph { -EltwiseLayerNode::EltwiseLayerNode(EltwiseOperation op) - : _op(op) +EltwiseLayerNode::EltwiseLayerNode(EltwiseOperation op, ConvertPolicy c_policy, RoundingPolicy r_policy) + : _op(op), _convert_policy(c_policy), _rounding_policy(r_policy) { _input_edges.resize(2, EmptyEdgeID); _outputs.resize(1, NullTensorID); @@ -42,6 +42,16 @@ EltwiseOperation EltwiseLayerNode::eltwise_operation() const return _op; } +ConvertPolicy EltwiseLayerNode::convert_policy() const +{ + return _convert_policy; +} + +RoundingPolicy EltwiseLayerNode::rounding_policy() const +{ + return _rounding_policy; +} + bool EltwiseLayerNode::forward_descriptors() { if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID)) @@ -56,8 +66,7 @@ bool EltwiseLayerNode::forward_descriptors() TensorDescriptor EltwiseLayerNode::configure_output(size_t idx) const { - ARM_COMPUTE_UNUSED(idx); - ARM_COMPUTE_UNUSED(_op); + ARM_COMPUTE_UNUSED(idx, _op, _convert_policy, _rounding_policy); const Tensor *src = input(0); ARM_COMPUTE_ERROR_ON(src == nullptr); diff --git a/utils/GraphUtils.cpp b/utils/GraphUtils.cpp index 145e44950b..0edb6f2a56 100644 --- a/utils/GraphUtils.cpp +++ b/utils/GraphUtils.cpp @@ -129,6 +129,45 @@ bool DummyAccessor::access_tensor(ITensor &tensor) return ret; } +NumPyAccessor::NumPyAccessor(std::string npy_path, TensorShape shape, DataType data_type, std::ostream &output_stream) + : _npy_tensor(), _filename(std::move(npy_path)), _output_stream(output_stream) +{ + NumPyBinLoader loader(_filename); + + TensorInfo info(shape, 1, data_type); + _npy_tensor.allocator()->init(info); + _npy_tensor.allocator()->allocate(); + + loader.access_tensor(_npy_tensor); +} + +template +void NumPyAccessor::access_numpy_tensor(ITensor &tensor) +{ + const int num_elements = tensor.info()->total_size(); + int num_mismatches = utils::compare_tensor(tensor, _npy_tensor); + float percentage_mismatches = static_cast(num_mismatches) / num_elements; + + _output_stream << "Results: " << 100.f - (percentage_mismatches * 100) << " % matches with the provided output[" << _filename << "]." << std::endl; +} + +bool NumPyAccessor::access_tensor(ITensor &tensor) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&tensor, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON(_npy_tensor.info()->dimension(0) != tensor.info()->dimension(0)); + + switch(tensor.info()->data_type()) + { + case DataType::F32: + access_numpy_tensor(tensor); + break; + default: + ARM_COMPUTE_ERROR("NOT SUPPORTED!"); + } + + return false; +} + PPMAccessor::PPMAccessor(std::string ppm_path, bool bgr, std::unique_ptr preprocessor) : _ppm_path(std::move(ppm_path)), _bgr(bgr), _preprocessor(std::move(preprocessor)) { diff --git a/utils/GraphUtils.h b/utils/GraphUtils.h index a8507b1ac7..597708369d 100644 --- a/utils/GraphUtils.h +++ b/utils/GraphUtils.h @@ -25,9 +25,11 @@ #define __ARM_COMPUTE_GRAPH_UTILS_H__ #include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/utils/misc/Utility.h" #include "arm_compute/graph/Graph.h" #include "arm_compute/graph/ITensorAccessor.h" #include "arm_compute/graph/Types.h" +#include "arm_compute/runtime/Tensor.h" #include #include @@ -117,6 +119,37 @@ private: unsigned int _maximum; }; +/** NumPy accessor class */ +class NumPyAccessor final : public graph::ITensorAccessor +{ +public: + /** Constructor + * + * @param[in] npy_path Path to npy file. + * @param[in] shape Shape of the numpy tensor data. + * @param[in] data_type DataType of the numpy tensor data. + * @param[out] output_stream (Optional) Output stream + */ + NumPyAccessor(std::string npy_path, TensorShape shape, DataType data_type, std::ostream &output_stream = std::cout); + /** Allow instances of this class to be move constructed */ + NumPyAccessor(NumPyAccessor &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NumPyAccessor(const NumPyAccessor &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NumPyAccessor &operator=(const NumPyAccessor &) = delete; + + // Inherited methods overriden: + bool access_tensor(ITensor &tensor) override; + +private: + template + void access_numpy_tensor(ITensor &tensor); + + Tensor _npy_tensor; + const std::string _filename; + std::ostream &_output_stream; +}; + /** PPM accessor class */ class PPMAccessor final : public graph::ITensorAccessor { @@ -273,7 +306,14 @@ inline std::unique_ptr get_input_accessor(const std::str } else { - return arm_compute::support::cpp14::make_unique(ppm_path, bgr, std::move(preprocessor)); + if(arm_compute::utility::endswith(ppm_path, ".npy")) + { + return arm_compute::support::cpp14::make_unique(ppm_path); + } + else + { + return arm_compute::support::cpp14::make_unique(ppm_path, bgr, std::move(preprocessor)); + } } } @@ -298,6 +338,28 @@ inline std::unique_ptr get_output_accessor(const std::st return arm_compute::support::cpp14::make_unique(labels_path, top_n, output_stream); } } +/** Generates appropriate npy output accessor according to the specified npy_path + * + * @note If npy_path is empty will generate a DummyAccessor else will generate a NpyAccessor + * + * @param[in] npy_path Path to npy file. + * @param[in] shape Shape of the numpy tensor data. + * @param[in] data_type DataType of the numpy tensor data. + * @param[out] output_stream (Optional) Output stream + * + * @return An appropriate tensor accessor + */ +inline std::unique_ptr get_npy_output_accessor(const std::string &npy_path, TensorShape shape, DataType data_type, std::ostream &output_stream = std::cout) +{ + if(npy_path.empty()) + { + return arm_compute::support::cpp14::make_unique(0); + } + else + { + return arm_compute::support::cpp14::make_unique(npy_path, shape, data_type, output_stream); + } +} /** Utility function to return the TargetHint * diff --git a/utils/Utils.h b/utils/Utils.h index cadba3a088..6cb71fd3ba 100644 --- a/utils/Utils.h +++ b/utils/Utils.h @@ -924,6 +924,43 @@ void init_sgemm_output(T &dst, T &src0, T &src1, arm_compute::DataType dt) * @return The free memory in kB */ uint64_t get_mem_free_from_meminfo(); + +/** Compare to tensor + * + * @param[in] tensor1 First tensor to be compared. + * @param[in] tensor2 Second tensor to be compared. + * + * @return The number of mismatches + */ +template +int compare_tensor(ITensor &tensor1, ITensor &tensor2) +{ + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(&tensor1, &tensor2); + ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(&tensor1, &tensor2); + + int num_mismatches = 0; + Window window; + window.use_tensor_dimensions(tensor1.info()->tensor_shape()); + + map(tensor1, true); + map(tensor2, true); + Iterator itensor1(&tensor1, window); + Iterator itensor2(&tensor2, window); + + execute_window_loop(window, [&](const Coordinates & id) + { + if(std::abs(*reinterpret_cast(itensor1.ptr()) - *reinterpret_cast(itensor2.ptr())) > 0.00001) + { + ++num_mismatches; + } + }, + itensor1, itensor2); + + unmap(itensor1); + unmap(itensor2); + + return num_mismatches; +} } // namespace utils } // namespace arm_compute #endif /* __UTILS_UTILS_H__*/ -- cgit v1.2.1