COMPMID-767 : Propagate hints to subgraph.

-Propagates hints to subgraph. -Fixes dispatching of apropriate optimized DepthwiseConvolution kernel for OpenCL backend. NEON backend is altered to default to the generic case until COMPMID-769 is addressed. Change-Id: I544f05cd99a9ac253f1b19aa4e4bb222b8fdd087 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/114781 Reviewed-by: Pablo Tello <pablo.tello@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
author: Georgios Pinitas <georgios.pinitas@arm.com> 2018-01-02 13:27:37 +0000
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:42:33 +0000
commit: 1250a5a259962514d31bb5f8148f1d0f0a82b946 (patch)
tree: a9c16daffa5228926715c805d73310b4b3c2e324 /src
parent: 7c23ad01c028f73aef0b439fc5d5d14e92e5f4e2 (diff)
download: ComputeLibrary-1250a5a259962514d31bb5f8148f1d0f0a82b946.tar.gz
7 files changed, 31 insertions, 54 deletions
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp
index ddc3a2dd25..f9229ba294 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp
@@ -33,32 +33,11 @@
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/core/Utils.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
 
 using namespace arm_compute;
-
-namespace
-{
-/** Calculates expected output shape dimension
- *
- * @param[in] Input shape
- *
- * @return Expected output shape
- */
-TensorShape get_output_shape(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info)
-{
-    unsigned int output_width  = 0;
-    unsigned int output_height = 0;
-
-    std::tie(output_width, output_height) = scaled_dimensions(input_shape.x(), input_shape.y(), weights_shape.x(), weights_shape.y(), conv_info);
-
-    TensorShape output_shape = input_shape;
-    output_shape.set(0, output_width);
-    output_shape.set(1, output_height);
-
-    return output_shape;
-}
-} // namespace
+using namespace arm_compute::misc::shape_calculator;
 
 CLDepthwiseConvolutionLayer3x3Kernel::CLDepthwiseConvolutionLayer3x3Kernel()
     : _border_size(0), _input(), _output(), _weights(), _biases(), _conv_stride_x(0), _conv_stride_y(0), _conv_pad_left(0), _conv_pad_top(0)
@@ -91,7 +70,7 @@ void CLDepthwiseConvolutionLayer3x3Kernel::configure(const ICLTensor *input, con
     }
 
     // Get convolved dimensions
-    TensorShape output_shape = get_output_shape(input->info()->tensor_shape(), weights->info()->tensor_shape(), conv_info);
+    const TensorShape output_shape = compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info);
 
     // Output auto inizialitation if not yet initialized
     auto_init_if_empty(*output->info(),
diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
index dd5c44801e..3cdb39ef94 100644
--- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
+++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
@@ -36,9 +36,11 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/core/Window.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
 
 using namespace arm_compute;
 using namespace arm_compute::detail;
+using namespace arm_compute::misc::shape_calculator;
 
 NEDepthwiseConvolutionLayer3x3Kernel::NEDepthwiseConvolutionLayer3x3Kernel()
     : _border_size(0), _input(), _output(), _weights(), _conv_info()
@@ -53,15 +55,21 @@ BorderSize NEDepthwiseConvolutionLayer3x3Kernel::border_size() const
 void NEDepthwiseConvolutionLayer3x3Kernel::configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info)
 {
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
     ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != 3 || weights->info()->dimension(1) != 3);
 
-    std::pair<unsigned int, unsigned int> expected_output = scaled_dimensions(input->info()->tensor_shape().x(), input->info()->tensor_shape().y(),
-                                                                              weights->info()->tensor_shape().x(), weights->info()->tensor_shape().y(),
-                                                                              conv_info);
+    // Get convolved dimensions
+    const TensorShape output_shape = compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info);
 
-    ARM_COMPUTE_ERROR_ON(expected_output.first != output->info()->tensor_shape().x());
-    ARM_COMPUTE_ERROR_ON(expected_output.second != output->info()->tensor_shape().y());
+    // Output auto inizialitation if not yet initialized
+    auto_init_if_empty(*output->info(),
+                       output_shape,
+                       1,
+                       input->info()->data_type(),
+                       input->info()->fixed_point_position(),
+                       input->info()->quantization_info());
+
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
 
     _input                           = input;
     _output                          = output;
@@ -80,12 +88,12 @@ void NEDepthwiseConvolutionLayer3x3Kernel::configure(const ITensor *input, const
     // Configure kernel window
     Window win = calculate_max_window(*output->info(), Steps(num_elems_written_per_iteration));
 
-    const unsigned int num_x_steps               = (expected_output.first + num_elems_written_per_iteration - 1) / num_elems_written_per_iteration;
+    const unsigned int num_x_steps               = (output_shape.x() + num_elems_written_per_iteration - 1) / num_elems_written_per_iteration;
     const int          input_num_elems_processed = get_input_num_elems_processed(num_elems_written_per_iteration, conv_stride_x);
 
-    AccessWindowStatic input_access(input->info(), -conv_pad_x, -conv_pad_y, (num_x_steps - 1) * input_num_elems_processed + 12, conv_stride_y * (expected_output.second - 1) + 2);
+    AccessWindowStatic input_access(input->info(), -conv_pad_x, -conv_pad_y, (num_x_steps - 1) * input_num_elems_processed + 12, conv_stride_y * (output_shape.y() - 1) + 2);
     AccessWindowStatic weights_access(weights->info(), 0, 0, weights->info()->dimension(0), weights->info()->dimension(1));
-    AccessWindowStatic output_access(output->info(), 0, 0, num_x_steps * num_elems_written_per_iteration, expected_output.second);
+    AccessWindowStatic output_access(output->info(), 0, 0, num_x_steps * num_elems_written_per_iteration, output_shape.y());
 
     update_window_and_padding(win, input_access, weights_access, output_access);
     output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
diff --git a/src/graph/SubGraph.cpp b/src/graph/SubGraph.cpp
index 977cd4a4ae..f62b2617c5 100644
--- a/src/graph/SubGraph.cpp
+++ b/src/graph/SubGraph.cpp
@@ -52,13 +52,13 @@ void SubGraph::add_tensor_object(std::unique_ptr<ITensorObject> tensor)
     }
 }
 
-std::unique_ptr<Graph> SubGraph::construct(TargetHint hint, std::unique_ptr<ITensorObject> input, std::unique_ptr<ITensorObject> output)
+std::unique_ptr<Graph> SubGraph::construct(const GraphContext &ctx, std::unique_ptr<ITensorObject> input, std::unique_ptr<ITensorObject> output)
 {
     auto graph = arm_compute::support::cpp14::make_unique<Graph>();
 
     // Set hint
     // TODO(geopin01): store hints of sub-graph
-    graph->hints().set_target_hint(hint);
+    graph->hints() = ctx.hints();
 
     // Configure input
     if(_input == nullptr)
diff --git a/src/graph/nodes/BranchLayer.cpp b/src/graph/nodes/BranchLayer.cpp
index d062e4b791..eea0540741 100644
--- a/src/graph/nodes/BranchLayer.cpp
+++ b/src/graph/nodes/BranchLayer.cpp
@@ -151,7 +151,7 @@ std::unique_ptr<arm_compute::IFunction> BranchLayer::instantiate_node(GraphConte
         }
 
         // Construct sub_graph
-        auto g = sg->construct(ctx.hints().target_hint(), std::move(in), std::move(out));
+        auto g = sg->construct(ctx, std::move(in), std::move(out));
 
         // Register graph to function
         func->register_graph(std::move(g));
diff --git a/src/graph/operations/CLSimpleOperations.cpp b/src/graph/operations/CLSimpleOperations.cpp
index 8f2bf23ce3..61315e73b2 100644
--- a/src/graph/operations/CLSimpleOperations.cpp
+++ b/src/graph/operations/CLSimpleOperations.cpp
@@ -156,13 +156,13 @@ REGISTER_SIMPLE_OPERATION(CLDepthwiseConvolutionOperation, OPENCL, OperationType
     bool                                    run_3x3_opt = opt3x3 && weights->info()->dimension(0) == 3;
     if(run_3x3_opt)
     {
-        auto depwthwise_conv = arm_compute::support::cpp14::make_unique<arm_compute::CLDepthwiseConvolutionLayer>();
+        auto depwthwise_conv = arm_compute::support::cpp14::make_unique<arm_compute::CLDepthwiseConvolutionLayer3x3>();
         depwthwise_conv->configure(in, weights, biases, out, conv_info);
         func = std::move(depwthwise_conv);
     }
     else
     {
-        auto depwthwise_conv = arm_compute::support::cpp14::make_unique<arm_compute::CLDepthwiseConvolutionLayer3x3>();
+        auto depwthwise_conv = arm_compute::support::cpp14::make_unique<arm_compute::CLDepthwiseConvolutionLayer>();
         depwthwise_conv->configure(in, weights, biases, out, conv_info);
         func = std::move(depwthwise_conv);
     }
diff --git a/src/graph/operations/NESimpleOperations.cpp b/src/graph/operations/NESimpleOperations.cpp
index bb99e8da4b..88bf3ec0a0 100644
--- a/src/graph/operations/NESimpleOperations.cpp
+++ b/src/graph/operations/NESimpleOperations.cpp
@@ -149,23 +149,13 @@ REGISTER_SIMPLE_OPERATION(NEDepthwiseConvolutionOperation, NEON, OperationType::
     auto      *biases    = ctx.num_inputs() == 3 ? dynamic_cast<arm_compute::ITensor *>(ctx.input(2)) : nullptr;
     auto      *out       = dynamic_cast<arm_compute::ITensor *>(ctx.output(0));
     const auto conv_info = ctx.parameter<PadStrideInfo>("ConvolutionInfo");
-    const auto opt3x3    = ctx.parameter<bool>("Optimized3x3");
 
     // Create and configure function
     std::unique_ptr<arm_compute::IFunction> func;
-    bool                                    run_3x3_opt = opt3x3 && weights->info()->dimension(0) == 3;
-    if(run_3x3_opt)
-    {
-        auto depwthwise_conv = arm_compute::support::cpp14::make_unique<arm_compute::NEDepthwiseConvolutionLayer>();
-        depwthwise_conv->configure(in, weights, biases, out, conv_info);
-        func = std::move(depwthwise_conv);
-    }
-    else
-    {
-        auto depwthwise_conv = arm_compute::support::cpp14::make_unique<arm_compute::NEDepthwiseConvolutionLayer3x3>();
-        depwthwise_conv->configure(in, weights, biases, out, conv_info);
-        func = std::move(depwthwise_conv);
-    }
+    // TODO (COMPMID-769): Add support for asymmetric padding in NEDepthwiseConvolutionLayer3x3 to enable opt3x3 support
+    auto depwthwise_conv = arm_compute::support::cpp14::make_unique<arm_compute::NEDepthwiseConvolutionLayer>();
+    depwthwise_conv->configure(in, weights, biases, out, conv_info);
+    func = std::move(depwthwise_conv);
 
     // Log info
     ARM_COMPUTE_LOG_GRAPH_INFO("Instantiating NEDepthwiseConvolutionLayer"
diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
index b890c6f5d5..4575c7af9d 100644
--- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
@@ -39,9 +39,9 @@ NEDepthwiseConvolutionLayer3x3::NEDepthwiseConvolutionLayer3x3()
 void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info)
 {
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
 
-    // Call convolution kernel
+    // Configure kernels
     _kernel.configure(input, weights, output, conv_info);
     _border_handler.configure(input, _kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<float>(0.f)));
     if(biases != nullptr)
author	Georgios Pinitas <georgios.pinitas@arm.com>	2018-01-02 13:27:37 +0000
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:42:33 +0000
commit	1250a5a259962514d31bb5f8148f1d0f0a82b946 (patch)
tree	a9c16daffa5228926715c805d73310b4b3c2e324 /src
parent	7c23ad01c028f73aef0b439fc5d5d14e92e5f4e2 (diff)
download	ComputeLibrary-1250a5a259962514d31bb5f8148f1d0f0a82b946.tar.gz