From 1250a5a259962514d31bb5f8148f1d0f0a82b946 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 2 Jan 2018 13:27:37 +0000 Subject: COMPMID-767 : Propagate hints to subgraph. -Propagates hints to subgraph. -Fixes dispatching of apropriate optimized DepthwiseConvolution kernel for OpenCL backend. NEON backend is altered to default to the generic case until COMPMID-769 is addressed. Change-Id: I544f05cd99a9ac253f1b19aa4e4bb222b8fdd087 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/114781 Reviewed-by: Pablo Tello Reviewed-by: Anthony Barbier Tested-by: Jenkins --- arm_compute/core/utils/misc/ShapeCalculator.h | 16 +++++++++++++ arm_compute/graph/SubGraph.h | 4 ++-- examples/graph_mobilenet.cpp | 4 +++- .../CLDepthwiseConvolutionLayer3x3Kernel.cpp | 27 +++------------------- .../NEDepthwiseConvolutionLayer3x3Kernel.cpp | 26 +++++++++++++-------- src/graph/SubGraph.cpp | 4 ++-- src/graph/nodes/BranchLayer.cpp | 2 +- src/graph/operations/CLSimpleOperations.cpp | 4 ++-- src/graph/operations/NESimpleOperations.cpp | 18 ++++----------- .../NEON/functions/NEDepthwiseConvolutionLayer.cpp | 4 ++-- 10 files changed, 52 insertions(+), 57 deletions(-) diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index 52773faa3a..f31eb3d336 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -25,6 +25,7 @@ #define __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H__ #include "arm_compute/core/ITensorInfo.h" +#include "arm_compute/core/Utils.h" namespace arm_compute { @@ -98,6 +99,21 @@ inline TensorShape compute_transposed_shape(const ITensorInfo &input) return shape_transposed; } +inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info) +{ + const TensorShape input_shape{ input.tensor_shape() }; + const TensorShape weights_shape{ weights.tensor_shape() }; + + unsigned int output_width = 0; + unsigned int output_height = 0; + std::tie(output_width, output_height) = scaled_dimensions(input_shape.x(), input_shape.y(), weights_shape.x(), weights_shape.y(), conv_info); + + TensorShape output_shape{ input_shape }; + output_shape.set(0, output_width); + output_shape.set(1, output_height); + + return output_shape; +} } // namespace shape_calculator } // namespace misc } // namespace arm_compute diff --git a/arm_compute/graph/SubGraph.h b/arm_compute/graph/SubGraph.h index d768bf9119..e3217e7095 100644 --- a/arm_compute/graph/SubGraph.h +++ b/arm_compute/graph/SubGraph.h @@ -56,13 +56,13 @@ public: void add_tensor_object(std::unique_ptr tensor); /** Constructs a graph from a subgraph * - * @param[in] hint Execution target hint + * @param[in] ctx Parent graph context * @param[in] input Input to the graph * @param[in] output Output to the graph * * @return A graph */ - std::unique_ptr construct(TargetHint hint, std::unique_ptr input, std::unique_ptr output); + std::unique_ptr construct(const GraphContext &ctx, std::unique_ptr input, std::unique_ptr output); /** Checks if the subgraph has an input * * @return True if the sub-graph has an input else false diff --git a/examples/graph_mobilenet.cpp b/examples/graph_mobilenet.cpp index 0c916c7ba0..553253383f 100644 --- a/examples/graph_mobilenet.cpp +++ b/examples/graph_mobilenet.cpp @@ -86,7 +86,8 @@ void main_graph_mobilenet(int argc, const char **argv) constexpr float mean_b = 104.01f; /* Mean value to subtract from blue channel */ // Set target. 0 (NEON), 1 (OpenCL). By default it is NEON - TargetHint target_hint = set_target_hint(argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0); + TargetHint target_hint = set_target_hint(argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0); + ConvolutionMethodHint convolution_hint = target_hint == TargetHint::NEON ? ConvolutionMethodHint::GEMM : ConvolutionMethodHint::DIRECT; // Parse arguments if(argc < 2) @@ -125,6 +126,7 @@ void main_graph_mobilenet(int argc, const char **argv) graph << target_hint << Tensor(TensorInfo(TensorShape(224U, 224U, 3U, 1U), 1, DataType::F32), get_input_accessor(image, mean_r, mean_g, mean_b)) + << convolution_hint << ConvolutionLayer( 3U, 3U, 32U, get_weights_accessor(data_path, "/cnn_data/mobilenet_v1_model/Conv2d_0_weights.npy"), diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp index ddc3a2dd25..f9229ba294 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp @@ -33,32 +33,11 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" using namespace arm_compute; - -namespace -{ -/** Calculates expected output shape dimension - * - * @param[in] Input shape - * - * @return Expected output shape - */ -TensorShape get_output_shape(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info) -{ - unsigned int output_width = 0; - unsigned int output_height = 0; - - std::tie(output_width, output_height) = scaled_dimensions(input_shape.x(), input_shape.y(), weights_shape.x(), weights_shape.y(), conv_info); - - TensorShape output_shape = input_shape; - output_shape.set(0, output_width); - output_shape.set(1, output_height); - - return output_shape; -} -} // namespace +using namespace arm_compute::misc::shape_calculator; CLDepthwiseConvolutionLayer3x3Kernel::CLDepthwiseConvolutionLayer3x3Kernel() : _border_size(0), _input(), _output(), _weights(), _biases(), _conv_stride_x(0), _conv_stride_y(0), _conv_pad_left(0), _conv_pad_top(0) @@ -91,7 +70,7 @@ void CLDepthwiseConvolutionLayer3x3Kernel::configure(const ICLTensor *input, con } // Get convolved dimensions - TensorShape output_shape = get_output_shape(input->info()->tensor_shape(), weights->info()->tensor_shape(), conv_info); + const TensorShape output_shape = compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info); // Output auto inizialitation if not yet initialized auto_init_if_empty(*output->info(), diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp index dd5c44801e..3cdb39ef94 100644 --- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp @@ -36,9 +36,11 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" using namespace arm_compute; using namespace arm_compute::detail; +using namespace arm_compute::misc::shape_calculator; NEDepthwiseConvolutionLayer3x3Kernel::NEDepthwiseConvolutionLayer3x3Kernel() : _border_size(0), _input(), _output(), _weights(), _conv_info() @@ -53,15 +55,21 @@ BorderSize NEDepthwiseConvolutionLayer3x3Kernel::border_size() const void NEDepthwiseConvolutionLayer3x3Kernel::configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != 3 || weights->info()->dimension(1) != 3); - std::pair expected_output = scaled_dimensions(input->info()->tensor_shape().x(), input->info()->tensor_shape().y(), - weights->info()->tensor_shape().x(), weights->info()->tensor_shape().y(), - conv_info); + // Get convolved dimensions + const TensorShape output_shape = compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info); - ARM_COMPUTE_ERROR_ON(expected_output.first != output->info()->tensor_shape().x()); - ARM_COMPUTE_ERROR_ON(expected_output.second != output->info()->tensor_shape().y()); + // Output auto inizialitation if not yet initialized + auto_init_if_empty(*output->info(), + output_shape, + 1, + input->info()->data_type(), + input->info()->fixed_point_position(), + input->info()->quantization_info()); + + ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); _input = input; _output = output; @@ -80,12 +88,12 @@ void NEDepthwiseConvolutionLayer3x3Kernel::configure(const ITensor *input, const // Configure kernel window Window win = calculate_max_window(*output->info(), Steps(num_elems_written_per_iteration)); - const unsigned int num_x_steps = (expected_output.first + num_elems_written_per_iteration - 1) / num_elems_written_per_iteration; + const unsigned int num_x_steps = (output_shape.x() + num_elems_written_per_iteration - 1) / num_elems_written_per_iteration; const int input_num_elems_processed = get_input_num_elems_processed(num_elems_written_per_iteration, conv_stride_x); - AccessWindowStatic input_access(input->info(), -conv_pad_x, -conv_pad_y, (num_x_steps - 1) * input_num_elems_processed + 12, conv_stride_y * (expected_output.second - 1) + 2); + AccessWindowStatic input_access(input->info(), -conv_pad_x, -conv_pad_y, (num_x_steps - 1) * input_num_elems_processed + 12, conv_stride_y * (output_shape.y() - 1) + 2); AccessWindowStatic weights_access(weights->info(), 0, 0, weights->info()->dimension(0), weights->info()->dimension(1)); - AccessWindowStatic output_access(output->info(), 0, 0, num_x_steps * num_elems_written_per_iteration, expected_output.second); + AccessWindowStatic output_access(output->info(), 0, 0, num_x_steps * num_elems_written_per_iteration, output_shape.y()); update_window_and_padding(win, input_access, weights_access, output_access); output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); diff --git a/src/graph/SubGraph.cpp b/src/graph/SubGraph.cpp index 977cd4a4ae..f62b2617c5 100644 --- a/src/graph/SubGraph.cpp +++ b/src/graph/SubGraph.cpp @@ -52,13 +52,13 @@ void SubGraph::add_tensor_object(std::unique_ptr tensor) } } -std::unique_ptr SubGraph::construct(TargetHint hint, std::unique_ptr input, std::unique_ptr output) +std::unique_ptr SubGraph::construct(const GraphContext &ctx, std::unique_ptr input, std::unique_ptr output) { auto graph = arm_compute::support::cpp14::make_unique(); // Set hint // TODO(geopin01): store hints of sub-graph - graph->hints().set_target_hint(hint); + graph->hints() = ctx.hints(); // Configure input if(_input == nullptr) diff --git a/src/graph/nodes/BranchLayer.cpp b/src/graph/nodes/BranchLayer.cpp index d062e4b791..eea0540741 100644 --- a/src/graph/nodes/BranchLayer.cpp +++ b/src/graph/nodes/BranchLayer.cpp @@ -151,7 +151,7 @@ std::unique_ptr BranchLayer::instantiate_node(GraphConte } // Construct sub_graph - auto g = sg->construct(ctx.hints().target_hint(), std::move(in), std::move(out)); + auto g = sg->construct(ctx, std::move(in), std::move(out)); // Register graph to function func->register_graph(std::move(g)); diff --git a/src/graph/operations/CLSimpleOperations.cpp b/src/graph/operations/CLSimpleOperations.cpp index 8f2bf23ce3..61315e73b2 100644 --- a/src/graph/operations/CLSimpleOperations.cpp +++ b/src/graph/operations/CLSimpleOperations.cpp @@ -156,13 +156,13 @@ REGISTER_SIMPLE_OPERATION(CLDepthwiseConvolutionOperation, OPENCL, OperationType bool run_3x3_opt = opt3x3 && weights->info()->dimension(0) == 3; if(run_3x3_opt) { - auto depwthwise_conv = arm_compute::support::cpp14::make_unique(); + auto depwthwise_conv = arm_compute::support::cpp14::make_unique(); depwthwise_conv->configure(in, weights, biases, out, conv_info); func = std::move(depwthwise_conv); } else { - auto depwthwise_conv = arm_compute::support::cpp14::make_unique(); + auto depwthwise_conv = arm_compute::support::cpp14::make_unique(); depwthwise_conv->configure(in, weights, biases, out, conv_info); func = std::move(depwthwise_conv); } diff --git a/src/graph/operations/NESimpleOperations.cpp b/src/graph/operations/NESimpleOperations.cpp index bb99e8da4b..88bf3ec0a0 100644 --- a/src/graph/operations/NESimpleOperations.cpp +++ b/src/graph/operations/NESimpleOperations.cpp @@ -149,23 +149,13 @@ REGISTER_SIMPLE_OPERATION(NEDepthwiseConvolutionOperation, NEON, OperationType:: auto *biases = ctx.num_inputs() == 3 ? dynamic_cast(ctx.input(2)) : nullptr; auto *out = dynamic_cast(ctx.output(0)); const auto conv_info = ctx.parameter("ConvolutionInfo"); - const auto opt3x3 = ctx.parameter("Optimized3x3"); // Create and configure function std::unique_ptr func; - bool run_3x3_opt = opt3x3 && weights->info()->dimension(0) == 3; - if(run_3x3_opt) - { - auto depwthwise_conv = arm_compute::support::cpp14::make_unique(); - depwthwise_conv->configure(in, weights, biases, out, conv_info); - func = std::move(depwthwise_conv); - } - else - { - auto depwthwise_conv = arm_compute::support::cpp14::make_unique(); - depwthwise_conv->configure(in, weights, biases, out, conv_info); - func = std::move(depwthwise_conv); - } + // TODO (COMPMID-769): Add support for asymmetric padding in NEDepthwiseConvolutionLayer3x3 to enable opt3x3 support + auto depwthwise_conv = arm_compute::support::cpp14::make_unique(); + depwthwise_conv->configure(in, weights, biases, out, conv_info); + func = std::move(depwthwise_conv); // Log info ARM_COMPUTE_LOG_GRAPH_INFO("Instantiating NEDepthwiseConvolutionLayer" diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp index b890c6f5d5..4575c7af9d 100644 --- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp @@ -39,9 +39,9 @@ NEDepthwiseConvolutionLayer3x3::NEDepthwiseConvolutionLayer3x3() void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); - // Call convolution kernel + // Configure kernels _kernel.configure(input, weights, output, conv_info); _border_handler.configure(input, _kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast(0.f))); if(biases != nullptr) -- cgit v1.2.1