aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h
diff options
context:
space:
mode:
authorAbe Mbise <abe.mbise@arm.com>2018-05-31 16:48:41 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commit7784c837afd5844fb6dc4d166ff253d983abfd2d (patch)
tree3bc770240de148d565aa828e8f3471c354ac3837 /arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h
parentb03f7c5c780fe2df23eb8c5c1b4b1d65bd7f0339 (diff)
downloadComputeLibrary-7784c837afd5844fb6dc4d166ff253d983abfd2d.tar.gz
COMPMID-1167: Validation for NEDepthwiseConvolutionLayer
Change-Id: I9689e1a0627dc015dd2ce98417e4c97bb55581bb Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/131327 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h')
-rw-r--r--arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h21
1 files changed, 19 insertions, 2 deletions
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h
index bd9e7eb781..3ffafd858f 100644
--- a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h
+++ b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h
@@ -53,8 +53,10 @@ public:
NEDepthwiseConvolutionLayer3x3Kernel &operator=(NEDepthwiseConvolutionLayer3x3Kernel &&) = default;
/** Initialize the function's source, destination, conv and border_size.
*
+ * @note Supported data layouts: NCHW and NHWC
+ *
* @param[in] input Source tensor. DataType supported: QASYMM8, F32.
- * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input.
+ * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [3, 3, IFM] for NCHW or [IFM, 3, 3] if NHWC data layout. Data type supported: Same as @p input.
* @param[out] output Destination tensor. Data type supported: Same as @p input.
* @param[in] conv_info Padding and stride information to use for the convolution.
* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
@@ -66,8 +68,8 @@ public:
* @param[in] input_shape Input shape
* @param[in] conv_info Padding and stride information to use for the convolution.
* @param[in] dt Data type of the input and weights
- * @param[in] data_layout (Optional) Data layout of the input and weights tensor
* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] data_layout (Optional) Data layout of the input and weights tensor
*
* @return True if the optimized kernels can be executed else false
*/
@@ -75,6 +77,20 @@ public:
/** Generates the convolver object */
void generate_convolver();
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3Kernel
+ *
+ * @note Supported data layouts: NCHW and NHWC
+ *
+ * @param[in] input Source tensor. DataType supported: QASYMM8, F32.
+ * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [3, 3, IFM] for NCHW or [IFM, 3, 3] if NHWC data layout. Data type supported: Same as @p input.
+ * @param[in] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
+
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
BorderSize border_size() const override;
@@ -82,6 +98,7 @@ public:
private:
void configure_generic();
void configure_optimized();
+
void run_generic(const Window &window, const ThreadInfo &info);
void run_optimized(const Window &window, const ThreadInfo &info);
/** Creates an optimized backend convolver object