aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAbe Mbise <abe.mbise@arm.com>2018-05-31 16:48:41 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commit7784c837afd5844fb6dc4d166ff253d983abfd2d (patch)
tree3bc770240de148d565aa828e8f3471c354ac3837
parentb03f7c5c780fe2df23eb8c5c1b4b1d65bd7f0339 (diff)
downloadComputeLibrary-7784c837afd5844fb6dc4d166ff253d983abfd2d.tar.gz
COMPMID-1167: Validation for NEDepthwiseConvolutionLayer
Change-Id: I9689e1a0627dc015dd2ce98417e4c97bb55581bb Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/131327 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h21
-rw-r--r--arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h15
-rw-r--r--arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h8
-rw-r--r--arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h10
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h10
-rw-r--r--arm_compute/core/utils/misc/ShapeCalculator.h13
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h28
-rw-r--r--src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp20
-rw-r--r--src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp204
-rw-r--r--src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp30
-rw-r--r--src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp38
-rw-r--r--src/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.cpp38
-rw-r--r--src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp64
-rw-r--r--src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp62
-rw-r--r--tests/validation/NEON/DepthwiseConvolutionLayer.cpp177
15 files changed, 558 insertions, 180 deletions
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h
index bd9e7eb781..3ffafd858f 100644
--- a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h
+++ b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h
@@ -53,8 +53,10 @@ public:
NEDepthwiseConvolutionLayer3x3Kernel &operator=(NEDepthwiseConvolutionLayer3x3Kernel &&) = default;
/** Initialize the function's source, destination, conv and border_size.
*
+ * @note Supported data layouts: NCHW and NHWC
+ *
* @param[in] input Source tensor. DataType supported: QASYMM8, F32.
- * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input.
+ * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [3, 3, IFM] for NCHW or [IFM, 3, 3] if NHWC data layout. Data type supported: Same as @p input.
* @param[out] output Destination tensor. Data type supported: Same as @p input.
* @param[in] conv_info Padding and stride information to use for the convolution.
* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
@@ -66,8 +68,8 @@ public:
* @param[in] input_shape Input shape
* @param[in] conv_info Padding and stride information to use for the convolution.
* @param[in] dt Data type of the input and weights
- * @param[in] data_layout (Optional) Data layout of the input and weights tensor
* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] data_layout (Optional) Data layout of the input and weights tensor
*
* @return True if the optimized kernels can be executed else false
*/
@@ -75,6 +77,20 @@ public:
/** Generates the convolver object */
void generate_convolver();
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3Kernel
+ *
+ * @note Supported data layouts: NCHW and NHWC
+ *
+ * @param[in] input Source tensor. DataType supported: QASYMM8, F32.
+ * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [3, 3, IFM] for NCHW or [IFM, 3, 3] if NHWC data layout. Data type supported: Same as @p input.
+ * @param[in] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
+
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
BorderSize border_size() const override;
@@ -82,6 +98,7 @@ public:
private:
void configure_generic();
void configure_optimized();
+
void run_generic(const Window &window, const ThreadInfo &info);
void run_optimized(const Window &window, const ThreadInfo &info);
/** Creates an optimized backend convolver object
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h
index 9c11cfa425..0d61d3ea38 100644
--- a/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h
@@ -65,6 +65,21 @@ public:
*/
void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias = false, unsigned int depth_multiplier = 1);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseIm2ColKernel
+ *
+ * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8, F32
+ * @param[in] output The output tensor. First 3 lower dimensions represent a transform of each 3D input,
+ * while every dimension above 3 represents a batch. Data types supported: Same as @p input
+ * @param[in] kernel_dims The kernel dimensions (width and height).
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] has_bias Boolean that specifies if the depthwise convolution has bias.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias = false, unsigned int depth_multiplier = 1);
+
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h
index 458cbd7812..00977a91b4 100644
--- a/arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h
@@ -62,6 +62,14 @@ public:
* @param[in] conv_h The converted tensor's height.
*/
void configure(const ITensor *input, ITensor *output, size_t conv_w, size_t conv_h);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseVectorToTensorKernel
+ *
+ * @param[in] input The input vector to convert. Data type supported: QASYMM8/S32/F32.
+ * @param[in] output The output tensor. 3 lower dimensions represent a single input [width, height, IFM]. Data type supported: same as @p input.
+ * @param[in] conv_w The converted tensor's width.
+ * @param[in] conv_h The converted tensor's height.
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, size_t conv_w, size_t conv_h);
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h
index d00e8a46ed..b78684f993 100644
--- a/arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h
@@ -59,6 +59,16 @@ public:
*/
void configure(const ITensor *input, ITensor *output, const ITensor *biases);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseWeightsReshapeKernel
+ *
+ * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM]. Data type supported: QASYMM8, F32.
+ * @param[in] output The output tensor. Data type supported: same as @p input.
+ * @param[in] biases (Optional) The input biases to add. Shape [IFM]. Data type supported: same as @p input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *biases);
+
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h
index a05d591850..7dddaca3a0 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h
@@ -56,6 +56,16 @@ public:
*/
void configure(const ITensor *input0, const ITensor *input1, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixVectorMultiplyKernel
+ *
+ * @param[in] input0 First Input tensor. Data types supported: QASYMM8/F32
+ * @param[in] input1 Second Input tensor. Data types supported: same as @p input.
+ * @param[in] output Output tensor which stores the interleaved matrix. Data type supported: same as @p input, S32 for QASYMM8 input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output);
+
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
BorderSize border_size() const override;
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index 221387649f..9bf6b046b4 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -36,6 +36,19 @@ namespace misc
{
namespace shape_calculator
{
+inline TensorShape compute_vector_to_tensor_output_shape(const TensorShape &input, size_t conv_w, size_t conv_h, const DataLayout &data_layout)
+{
+ const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+
+ TensorShape output_shape(input);
+ output_shape.set(idx_w, conv_w);
+ output_shape.set(idx_h, conv_h);
+ output_shape.set(idx_c, input.x() / (conv_w * conv_h));
+
+ return output_shape;
+}
inline TensorShape compute_permutation_output_shape(const ITensorInfo &input, const PermutationVector &perm)
{
TensorShape output_shape = input.tensor_shape();
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index aa4cace7c2..1317fb740e 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -65,6 +65,20 @@ public:
*/
void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling).
+ * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input.
+ * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input.
+ * @param[in] output Destination tensor. Data type supported: same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
+
// Inherited methods overriden:
void run() override;
@@ -120,6 +134,20 @@ public:
*/
void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling).
+ * @param[in] output Destination tensor. Data type supported: same as @p input.
+ * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
+ * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
+
// Inherited methods overriden:
void run() override;
void prepare() override;
diff --git a/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp b/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp
index e124ee42f3..67b2cc9f55 100644
--- a/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp
@@ -31,26 +31,14 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "support/ToolchainSupport.h"
using namespace arm_compute;
+using namespace arm_compute::misc::shape_calculator;
namespace
{
-TensorShape compute_output_shape(const TensorShape &input, size_t conv_w, size_t conv_h, const DataLayout &data_layout)
-{
- const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
-
- TensorShape output_shape(input);
- output_shape.set(idx_w, conv_w);
- output_shape.set(idx_h, conv_h);
- output_shape.set(idx_c, input.x() / (conv_w * conv_h));
-
- return output_shape;
-}
-
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, size_t conv_w, size_t conv_h)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
@@ -58,7 +46,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, s
if(output->total_size() != 0)
{
- TensorShape output_shape = compute_output_shape(input->tensor_shape(), conv_w, conv_h, output->data_layout());
+ TensorShape output_shape = compute_vector_to_tensor_output_shape(input->tensor_shape(), conv_w, conv_h, output->data_layout());
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
}
@@ -77,7 +65,7 @@ void CLDepthwiseVectorToTensorKernel::configure(const ICLTensor *input, ICLTenso
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
// Output auto inizialitation if not yet initialized
- TensorShape output_shape = compute_output_shape(input->info()->tensor_shape(), conv_w, conv_h, output->info()->data_layout());
+ TensorShape output_shape = compute_vector_to_tensor_output_shape(input->info()->tensor_shape(), conv_w, conv_h, output->info()->data_layout());
auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), conv_w, conv_h));
diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
index 09728e2a8d..62dabc8d32 100644
--- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
+++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
@@ -144,6 +144,112 @@ inline void convolve_3x3(const Window &window, unsigned int num_elems_written_pe
ARM_COMPUTE_ERROR("Not implemented");
}
}
+
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, bool is_optimized)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
+
+ if(is_optimized)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(1) != 3 || weights->dimension(2) != 3);
+ }
+ else
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != 3 || weights->dimension(1) != 3);
+ ARM_COMPUTE_RETURN_ERROR_ON(conv_info.stride().first < 1 || conv_info.stride().first > 3);
+ }
+
+ if(output->total_size() != 0)
+ {
+ const TensorShape output_shape = compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
+
+ ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(input->data_type()) && (output->data_type() != DataType::S32));
+ ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_float(input->data_type()) && (output->data_type() != DataType::F32));
+ }
+
+ return Status{};
+}
+
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *weights, ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, bool is_optimized,
+ IDepthwiseConvolution *convolver = nullptr)
+{
+ Window win;
+ bool window_changed = false;
+
+ if(is_optimized)
+ {
+ if(convolver != nullptr)
+ {
+ auto win_last = convolver->get_window();
+ win.set(Window::DimX, Window::Dimension(0, win_last, 1));
+
+ // Auto-configure output
+ bool same_padding = conv_info.has_padding();
+ TensorShape output_shape{ input->tensor_shape() };
+
+ output_shape.set(1, convolver->output_size(output_shape.y(), same_padding)); // Set width
+ output_shape.set(2, convolver->output_size(output_shape.z(), same_padding)); // Set height
+
+ // Output auto inizialitation if not yet initialized
+ auto_init_if_empty(*output, input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape));
+
+ // Configure window (optimised)
+ // Set padding in channels
+ const int num_channels = weights->dimension(0);
+ if((num_channels >= 128) && (num_channels % 16 == 0))
+ {
+ input->extend_padding(PaddingSize(0, 4, 0, 0));
+ weights->extend_padding(PaddingSize(0, 4, 0, 0));
+ output->extend_padding(PaddingSize(0, 4, 0, 0));
+ }
+ }
+ }
+ else
+ {
+ // Get convolved dimensions
+ const TensorShape output_shape = compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier);
+ const DataType output_dt = (input->data_type() == DataType::QASYMM8) ? DataType::S32 : input->data_type();
+
+ // Output auto inizialitation if not yet initialized
+ auto_init_if_empty(*output, input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape).set_data_type(output_dt));
+
+ // Configure kernel window (generic)
+ const unsigned int conv_stride_x = conv_info.stride().first;
+ const unsigned int conv_stride_y = conv_info.stride().second;
+ const unsigned int conv_pad_top = conv_info.pad_top();
+ const unsigned int conv_pad_left = conv_info.pad_left();
+
+ unsigned int num_elems_written_per_iteration = 16 >> conv_stride_x;
+ unsigned int num_elems_read_per_iteration = 0;
+
+ switch(input->data_type())
+ {
+ case DataType::QASYMM8:
+ num_elems_read_per_iteration = 16;
+ break;
+ case DataType::F32:
+ num_elems_read_per_iteration = 12;
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Data type not supported.");
+ }
+
+ // Configure kernel window
+ win = calculate_max_window(*output, Steps(num_elems_written_per_iteration));
+
+ AccessWindowRectangle input_access(input, -conv_pad_left, -conv_pad_top, num_elems_read_per_iteration, 3, conv_stride_x, conv_stride_y);
+ AccessWindowStatic weights_access(weights, 0, 0, 3, 3);
+ AccessWindowHorizontal output_access(output, 0, num_elems_written_per_iteration);
+
+ window_changed = update_window_and_padding(win, input_access, weights_access, output_access);
+ output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
+ }
+
+ Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+ return std::make_pair(err, win);
+}
} // namespace
NEDepthwiseConvolutionLayer3x3Kernel::NEDepthwiseConvolutionLayer3x3Kernel()
@@ -159,8 +265,7 @@ BorderSize NEDepthwiseConvolutionLayer3x3Kernel::border_size() const
void NEDepthwiseConvolutionLayer3x3Kernel::configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
DataLayout data_layout)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
_input = input;
_output = output;
@@ -177,6 +282,17 @@ void NEDepthwiseConvolutionLayer3x3Kernel::configure(const ITensor *input, const
(_run_optimized) ? configure_optimized() : configure_generic();
}
+Status NEDepthwiseConvolutionLayer3x3Kernel::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
+
+ bool is_optimized = NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(input->tensor_shape(), conv_info, input->data_type(), depth_multiplier, input->data_layout());
+
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, output, conv_info, depth_multiplier, is_optimized));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), weights->clone().get(), output->clone().get(), conv_info, depth_multiplier, is_optimized).first);
+ return Status{};
+}
+
void NEDepthwiseConvolutionLayer3x3Kernel::run(const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
@@ -227,90 +343,26 @@ void NEDepthwiseConvolutionLayer3x3Kernel::generate_convolver()
void NEDepthwiseConvolutionLayer3x3Kernel::configure_generic()
{
- ARM_COMPUTE_ERROR_ON(_weights->info()->dimension(0) != 3 || _weights->info()->dimension(1) != 3);
-
- // Get convolved dimensions
- const TensorShape output_shape = compute_depthwise_convolution_shape(*_input->info(), *_weights->info(), _conv_info, _depth_multiplier);
- const DataType output_dt = (_input->info()->data_type() == DataType::QASYMM8) ? DataType::S32 : _input->info()->data_type();
-
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*_output->info(),
- _input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape).set_data_type(output_dt));
-
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(_output->info()->tensor_shape(), output_shape);
-
- const unsigned int conv_stride_x = _conv_info.stride().first;
- const unsigned int conv_stride_y = _conv_info.stride().second;
- const unsigned int conv_pad_top = _conv_info.pad_top();
- const unsigned int conv_pad_right = _conv_info.pad_right();
- const unsigned int conv_pad_bottom = _conv_info.pad_bottom();
- const unsigned int conv_pad_left = _conv_info.pad_left();
-
- ARM_COMPUTE_ERROR_ON(conv_stride_x < 1 || conv_stride_x > 3);
-
- unsigned int num_elems_read_per_iteration = 0;
- switch(_input->info()->data_type())
- {
- case DataType::QASYMM8:
- num_elems_read_per_iteration = 16;
- _num_elems_written_per_iteration = 16 >> conv_stride_x;
- break;
- case DataType::F32:
- num_elems_read_per_iteration = 12;
- _num_elems_written_per_iteration = 16 >> conv_stride_x;
- break;
- default:
- ARM_COMPUTE_ERROR("Data type not supported.");
- }
- _border_size = BorderSize(conv_pad_top, conv_pad_right, conv_pad_bottom, conv_pad_left);
-
- // Configure kernel window
- Window win = calculate_max_window(*_output->info(), Steps(_num_elems_written_per_iteration));
-
- AccessWindowRectangle input_access(_input->info(), -conv_pad_left, -conv_pad_top,
- num_elems_read_per_iteration, 3,
- conv_stride_x, conv_stride_y);
- AccessWindowStatic weights_access(_weights->info(), 0, 0, 3, 3);
- AccessWindowHorizontal output_access(_output->info(), 0, _num_elems_written_per_iteration);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(_input->info(), _weights->info(), _output->info(), _conv_info, _depth_multiplier, _run_optimized));
- update_window_and_padding(win, input_access, weights_access, output_access);
- output_access.set_valid_region(win, ValidRegion(Coordinates(), _output->info()->tensor_shape()));
+ _num_elems_written_per_iteration = 16 >> _conv_info.stride().first;
+ _border_size = BorderSize(_conv_info.pad_top(), _conv_info.pad_right(), _conv_info.pad_bottom(), _conv_info.pad_left());
- INEKernel::configure(win);
+ auto win_config = validate_and_configure_window(_input->info(), _weights->info(), _output->info(), _conv_info, _depth_multiplier, false);
+ ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+ INEKernel::configure(win_config.second);
}
void NEDepthwiseConvolutionLayer3x3Kernel::configure_optimized()
{
- ARM_COMPUTE_ERROR_ON(_weights->info()->dimension(1) != 3 || _weights->info()->dimension(2) != 3);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(_input->info(), _weights->info(), _output->info(), _conv_info, _depth_multiplier, _run_optimized));
_border_size = BorderSize(0, 0);
_convolver = create_convolver_object(_conv_info, _weights, _input, _output);
- // Auto-configure output
- bool same_padding = _conv_info.has_padding();
- TensorShape output_shape{ _input->info()->tensor_shape() };
-
- output_shape.set(1, _convolver->output_size(output_shape.y(), same_padding)); // Set width
- output_shape.set(2, _convolver->output_size(output_shape.z(), same_padding)); // Set height
-
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*_output->info(),
- _input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape));
-
- // Set padding in channels
- const int num_channels = _weights->info()->dimension(0);
- if((num_channels >= 128) && (num_channels % 16 == 0))
- {
- _input->info()->extend_padding(PaddingSize(0, 4, 0, 0));
- _weights->info()->extend_padding(PaddingSize(0, 4, 0, 0));
- _output->info()->extend_padding(PaddingSize(0, 4, 0, 0));
- }
-
- // Configure window
- Window win;
- auto win_last = _convolver->get_window();
- win.set(Window::DimX, Window::Dimension(0, win_last, 1));
- INEKernel::configure(win);
+ auto win_config = validate_and_configure_window(_input->info(), _weights->info(), _output->info(), _conv_info, _depth_multiplier, true, _convolver.get());
+ ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+ INEKernel::configure(win_config.second);
}
void NEDepthwiseConvolutionLayer3x3Kernel::run_generic(const Window &window, const ThreadInfo &info)
diff --git a/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp b/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp
index 5b43e2b14f..91b29cdf03 100644
--- a/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp
@@ -37,6 +37,21 @@
using namespace arm_compute;
+namespace
+{
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, unsigned int depth_multiplier)
+{
+ ARM_COMPUTE_UNUSED(conv_info);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(input->data_type()) && has_bias);
+ ARM_COMPUTE_RETURN_ERROR_ON((input->dimension(2) * depth_multiplier) != output->dimension(2));
+ ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) != (kernel_dims.width * kernel_dims.height + ((has_bias) ? 1 : 0)));
+
+ return Status{};
+}
+} // namespace
+
template <typename T>
void NEDepthwiseIm2ColKernel::run_generic(const Window &window)
{
@@ -120,11 +135,9 @@ NEDepthwiseIm2ColKernel::NEDepthwiseIm2ColKernel()
void NEDepthwiseIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, unsigned int depth_multiplier)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_ON(is_data_type_quantized_asymmetric(input->info()->data_type()) && has_bias);
- ARM_COMPUTE_ERROR_ON((input->info()->dimension(2) * depth_multiplier) != output->info()->dimension(2));
- ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) != (kernel_dims.width * kernel_dims.height + ((has_bias) ? 1 : 0)));
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), kernel_dims, conv_info, has_bias, depth_multiplier));
_input = input;
_output = output;
@@ -158,6 +171,13 @@ void NEDepthwiseIm2ColKernel::configure(const ITensor *input, ITensor *output, c
INEKernel::configure(win);
}
+Status NEDepthwiseIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, unsigned int depth_multiplier)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, kernel_dims, conv_info, has_bias, depth_multiplier));
+ return Status{};
+}
+
void NEDepthwiseIm2ColKernel::run(const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(info);
diff --git a/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp b/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp
index 86a6d1c1a8..fe141bef56 100644
--- a/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp
@@ -34,8 +34,27 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
using namespace arm_compute;
+using namespace arm_compute::misc::shape_calculator;
+
+namespace
+{
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, size_t conv_w, size_t conv_h)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::S32, DataType::F16, DataType::F32);
+
+ if(output->total_size() != 0)
+ {
+ TensorShape output_shape = compute_vector_to_tensor_output_shape(input->tensor_shape(), conv_w, conv_h, output->data_layout());
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+ }
+
+ return Status{};
+}
+} // namespace
template <typename T>
void NEDepthwiseVectorToTensorKernel::vector_to_tensor(const Window &window)
@@ -76,19 +95,13 @@ NEDepthwiseVectorToTensorKernel::NEDepthwiseVectorToTensorKernel()
void NEDepthwiseVectorToTensorKernel::configure(const ITensor *input, ITensor *output, size_t conv_w, size_t conv_h)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::S32, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_NULLPTR(output);
-
- TensorShape output_shape = input->info()->tensor_shape();
- output_shape.set(0, conv_w);
- output_shape.set(1, conv_h);
- output_shape.set(2, input->info()->tensor_shape()[0] / (conv_w * conv_h));
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
// Output auto inizialitation if not yet initialized
+ TensorShape output_shape = compute_vector_to_tensor_output_shape(input->info()->tensor_shape(), conv_w, conv_h, output->info()->data_layout());
auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), conv_w, conv_h));
_input = input;
_output = output;
@@ -121,6 +134,13 @@ void NEDepthwiseVectorToTensorKernel::configure(const ITensor *input, ITensor *o
INEKernel::configure(win);
}
+Status NEDepthwiseVectorToTensorKernel::validate(const ITensorInfo *input, const ITensorInfo *output, size_t conv_w, size_t conv_h)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, conv_w, conv_h));
+ return Status{};
+}
+
void NEDepthwiseVectorToTensorKernel::run(const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(info);
diff --git a/src/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.cpp b/src/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.cpp
index 47fcf12874..2c7a379c25 100644
--- a/src/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.cpp
@@ -77,6 +77,24 @@ void weights_reshape(const ITensor *input, const ITensor *bias, ITensor *output,
},
in, out);
}
+
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *biases)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(input->data_type()) && (biases != nullptr));
+ ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(2) != output->dimension(1));
+ ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) != (input->dimension(0) * input->dimension(1) + ((biases != nullptr) ? 1 : 0)));
+
+ if(biases != nullptr)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
+ ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != input->dimension(2));
+ ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
+ }
+
+ return Status{};
+}
} // namespace
NEDepthwiseWeightsReshapeKernel::NEDepthwiseWeightsReshapeKernel()
@@ -86,18 +104,9 @@ NEDepthwiseWeightsReshapeKernel::NEDepthwiseWeightsReshapeKernel()
void NEDepthwiseWeightsReshapeKernel::configure(const ITensor *input, ITensor *output, const ITensor *biases)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_ON(is_data_type_quantized_asymmetric(input->info()->data_type()) && (biases != nullptr));
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != output->info()->dimension(1));
- ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) != (input->info()->dimension(0) * input->info()->dimension(1) + ((biases != nullptr) ? 1 : 0)));
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- if(biases != nullptr)
- {
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
- ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != input->info()->dimension(2));
- ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1);
- }
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), (biases != nullptr) ? biases->info() : nullptr));
_input = input;
_output = output;
@@ -135,6 +144,13 @@ void NEDepthwiseWeightsReshapeKernel::configure(const ITensor *input, ITensor *o
INEKernel::configure(win);
}
+Status NEDepthwiseWeightsReshapeKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *biases)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, biases));
+ return Status{};
+}
+
void NEDepthwiseWeightsReshapeKernel::run(const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(info);
diff --git a/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp
index 8588f43edf..238786953b 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp
@@ -39,6 +39,43 @@
using namespace arm_compute;
+namespace
+{
+Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::QASYMM8, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(output, DataType::S32, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1);
+ ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(input0->data_type()) && (output->data_type() != DataType::S32));
+ ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_float(input0->data_type()) && (output->data_type() != DataType::F32));
+
+ ARM_COMPUTE_RETURN_ERROR_ON(input0->num_dimensions() == input1->num_dimensions());
+ ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(2) != input1->dimension(1));
+ ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(DataLayoutDimension::HEIGHT) != output->dimension(DataLayoutDimension::HEIGHT));
+ ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(DataLayoutDimension::WIDTH) != output->dimension(DataLayoutDimension::WIDTH));
+
+ return Status{};
+}
+
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1, ITensorInfo *output)
+{
+ const unsigned int num_elems_read_per_iteration = 16 / input0->element_size();
+
+ Window win = calculate_max_window(*input0, Steps(num_elems_read_per_iteration));
+
+ AccessWindowHorizontal input0_access(input0, 0, num_elems_read_per_iteration);
+ AccessWindowHorizontal input1_access(input1, 0, num_elems_read_per_iteration);
+ AccessWindowStatic output_access(output, 0, 0, output->dimension(0), output->dimension(1));
+
+ bool window_changed = update_window_and_padding(win, input0_access, input1_access, output_access);
+
+ output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
+
+ Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+ return std::make_pair(err, win);
+}
+} // namespace
+
template <typename I0, typename I1, typename O>
void NEGEMMMatrixVectorMultiplyKernel::matrix_vector_multiply(const Window &window_in, const Window &window_w, const Window &window_out)
{
@@ -175,10 +212,9 @@ BorderSize NEGEMMMatrixVectorMultiplyKernel::border_size() const
void NEGEMMMatrixVectorMultiplyKernel::configure(const ITensor *input0, const ITensor *input1, ITensor *output)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::QASYMM8, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1);
- ARM_COMPUTE_ERROR_ON(is_data_type_quantized_asymmetric(input0->info()->data_type()) && (output->info()->data_type() != DataType::S32));
- ARM_COMPUTE_ERROR_ON(input0->info()->dimension(2) != input1->info()->dimension(1));
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output);
+
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input0->info(), input1->info(), output->info()));
_input0 = input0;
_input1 = input1;
@@ -203,17 +239,17 @@ void NEGEMMMatrixVectorMultiplyKernel::configure(const ITensor *input0, const IT
const unsigned int border_x = ceil_to_multiple(input0->info()->dimension(0), num_elems_read_per_iteration) - input0->info()->dimension(0);
_border_size = BorderSize(0, border_x);
- Window win = calculate_max_window(*input0->info(), Steps(num_elems_read_per_iteration));
-
- AccessWindowHorizontal input0_access(input0->info(), 0, num_elems_read_per_iteration);
- AccessWindowHorizontal input1_access(input1->info(), 0, num_elems_read_per_iteration);
- AccessWindowStatic output_access(output->info(), 0, 0, output->info()->dimension(0), output->info()->dimension(1));
-
- update_window_and_padding(win, input0_access, input1_access, output_access);
-
- _output->info()->set_valid_region(ValidRegion(Coordinates(), _output->info()->tensor_shape()));
+ auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info());
+ ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+ INEKernel::configure(win_config.second);
+}
- INEKernel::configure(win);
+Status NEGEMMMatrixVectorMultiplyKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input0, input1, output);
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input0, input1, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input0->clone().get(), input1->clone().get(), output->clone().get()).first);
+ return Status{};
}
void NEGEMMMatrixVectorMultiplyKernel::run(const Window &window, const ThreadInfo &info)
diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
index 1d65dde2a6..3b54ed62c7 100644
--- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
@@ -123,6 +123,16 @@ void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *we
}
}
+Status NEDepthwiseConvolutionLayer3x3::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
+ ARM_COMPUTE_UNUSED(biases);
+ ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW && input->data_layout() != DataLayout::NHWC);
+
+ return NEDepthwiseConvolutionLayer3x3Kernel::validate(input, weights, output, conv_info, depth_multiplier);
+}
+
void NEDepthwiseConvolutionLayer3x3::run()
{
if(_is_first_run && _is_optimized)
@@ -263,6 +273,58 @@ void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weigh
_v2mm_output.allocator()->allocate();
}
+Status NEDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
+ ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW && input->data_layout() != DataLayout::NHWC);
+
+ const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
+ const bool append_bias = (biases != nullptr) && !is_quantized;
+ const TensorShape output_shape = shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier);
+ const size_t weights_w = weights->dimension(0);
+ const size_t weights_h = weights->dimension(1);
+ const size_t weights_z = weights->dimension(2);
+ const unsigned int conv_w = output_shape.x();
+ const unsigned int conv_h = output_shape.y();
+ const size_t patch_size = weights_w * weights_h + (append_bias ? 1 : 0);
+ const size_t conv_size = conv_w * conv_h;
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
+
+ // Im2Col configuration
+ TensorShape shape_im2col = input->tensor_shape();
+ shape_im2col.set(0, patch_size);
+ shape_im2col.set(1, conv_size);
+ shape_im2col.set(2, weights_z);
+ TensorInfo input_reshaped(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_im2col));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseIm2ColKernel::validate(input, &input_reshaped, Size2D(weights_w, weights_h), conv_info, append_bias, depth_multiplier));
+
+ // Weights reshape configuration
+ const TensorShape shape_weights_reshape(patch_size, weights_z);
+ TensorInfo weights_reshaped(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_weights_reshape));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseWeightsReshapeKernel::validate(weights, &weights_reshaped, append_bias ? biases : nullptr));
+
+ // GEMV configuration
+ DataType v2mm_dt = (input->data_type() == DataType::QASYMM8) ? DataType::S32 : input->data_type();
+ TensorShape shape_v2mm_out = input->tensor_shape();
+ shape_v2mm_out.set(0, conv_size * weights_z);
+ shape_v2mm_out.set(1, 1);
+ shape_v2mm_out.set(2, 1);
+ TensorInfo v2mm_output(input->clone()->set_is_resizable(true).reset_padding().set_data_type(v2mm_dt).set_tensor_shape(shape_v2mm_out));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixVectorMultiplyKernel::validate(&input_reshaped, &weights_reshaped, &v2mm_output));
+
+ TensorInfo output_reshaped(v2mm_output.clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseVectorToTensorKernel::validate(&v2mm_output, (is_quantized) ? &output_reshaped : output, conv_w, conv_h));
+
+ if(is_quantized)
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerOutputStageKernel::validate(&output_reshaped, biases, output));
+ }
+
+ return Status{};
+}
+
void NEDepthwiseConvolutionLayer::run()
{
prepare();
diff --git a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
index b1cc491ac8..956fd741df 100644
--- a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
@@ -54,56 +54,139 @@ const auto depth_multipliers = framework::dataset::make("DepthMultiplier", { 1,
TEST_SUITE(NEON)
TEST_SUITE(DepthwiseConvLayer)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
- datasets::LargeDepthwiseConvolutionLayerDataset3x3()),
- depth_multipliers),
- framework::dataset::make("DataType", DataType::F32)),
- input_shape, kernel_size, info, depth_multiplier, data_type)
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32), // Mismatching data type input/weights
+ TensorInfo(TensorShape(32U, 18U, 3U), 1, DataType::F32), // Mismatching input feature maps
+ TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32), // Unsupported weights dimensions
+ TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32), // Mismatching depth multiplier
+ TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32), // Invalid stride
+ TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32), // Invalid biases size
+ TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32), // Invalid biases dimensions
+ TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32), // Invalid output size
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Window shrink
+ }),
+ framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(5U, 5U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ })),
+ framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U), 1, DataType::F32),
+ })),
+ framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+ })),
+ framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(4, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ })),
+ framework::dataset::make("DepthMultiplier", { 1,
+ 1,
+ 1,
+ 3,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ })),
+ framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false })),
+ input_info, weights_info, biases_info, output_info, conv_info, depth_multiplier, expected)
{
- // Get shapes
- TensorShape weights_shape(kernel_size.width, kernel_size.height);
-
- const TensorInfo in_info(input_shape, 1, data_type);
- const TensorInfo we_info(weights_shape, 1, data_type);
- const TensorShape output_shape = compute_depthwise_convolution_shape(in_info, we_info, info, depth_multiplier);
-
- weights_shape.set(2, output_shape.z());
-
- // Create tensors
- Tensor src = create_tensor<Tensor>(input_shape, data_type);
- Tensor dst = create_tensor<Tensor>(output_shape, data_type);
- Tensor weights = create_tensor<Tensor>(weights_shape, data_type);
- const TensorShape bias_shape(weights_shape[2]);
- Tensor bias = create_tensor<Tensor>(bias_shape, data_type);
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Create and configure function
- NEDepthwiseConvolutionLayer3x3 depthwise_layer;
- depthwise_layer.configure(&src, &weights, &bias, &dst, info, depth_multiplier);
-
- // Validate valid region
- const ValidRegion input_valid_region = shape_to_valid_region(input_shape);
- const ValidRegion output_valid_region = shape_to_valid_region(output_shape);
- const ValidRegion weights_valid_region = shape_to_valid_region(weights_shape);
- const ValidRegion bias_valid_region = shape_to_valid_region(bias_shape);
-
- validate(src.info()->valid_region(), input_valid_region);
- validate(dst.info()->valid_region(), output_valid_region);
- validate(weights.info()->valid_region(), weights_valid_region);
- validate(bias.info()->valid_region(), bias_valid_region);
+ bool is_valid = bool(NEDepthwiseConvolutionLayer3x3::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, depth_multiplier));
+ ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
- // Validate padding
- bool is_optimized_run = NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(input_shape, info, data_type, depth_multiplier, DataLayout::NCHW);
- const int step_non_opt_dwc = 16 >> info.stride().first;
- const int step_bias_add = 16 / src.info()->element_size();
- const int step = is_optimized_run ? step_bias_add : std::max(step_non_opt_dwc, step_bias_add);
- const PaddingSize padding = PaddingCalculator(output_shape.x(), step).required_padding();
- validate(dst.info()->padding(), padding);
+DATA_TEST_CASE(ValidateGeneric, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/weights
+ TensorInfo(TensorShape(27U, 13U, 3U), 1, DataType::F32), // Mismatching input feature maps
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching depth multiplier
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases size
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases dimensions
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid output size
+ TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 8U), 1, DataType::QASYMM8),
+ }),
+ framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(3U, 3U, 24U), 1, DataType::QASYMM8),
+ })),
+ framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(4U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(2U), 1, DataType::F32),
+ TensorInfo(TensorShape(16U), 1, DataType::F32),
+ TensorInfo(TensorShape(24U), 1, DataType::S32),
+ })),
+ framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 11U, 24U), 1, DataType::QASYMM8),
+ })),
+ framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 1, 0),
+ })),
+ framework::dataset::make("DepthMultiplier", { 1,
+ 1,
+ 3,
+ 1,
+ 1,
+ 1,
+ 2,
+ 3,
+ })),
+ framework::dataset::make("Expected", { false, false, false, false, false, false, true, true })),
+ input_info, weights_info, biases_info, output_info, conv_info, depth_multiplier, expected)
+{
+ bool is_valid = bool(NEDepthwiseConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, depth_multiplier));
+ ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
}
+// clang-format on
+// *INDENT-ON*
TEST_SUITE(Float)
TEST_SUITE(F32)