aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2018-04-04 17:44:26 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:50:48 +0000
commit7657224de2b697a8a92cccf26d98e53ccd7c1a03 (patch)
tree1dcfa4541dbaf753854a628c93991652158d373e /arm_compute/core
parente74b201ca1abca040ca9f30837fdf19aa610e7c4 (diff)
downloadComputeLibrary-7657224de2b697a8a92cccf26d98e53ccd7c1a03.tar.gz
COMPMID-926 Add depth multiplier support to NEON/CL/GLES depthwise convolution
Change-Id: I03f32c62350e5ea43e77bb15fc5a832d83719e3b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/126657 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'arm_compute/core')
-rw-r--r--arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h17
-rw-r--r--arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h34
-rw-r--r--arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h19
-rw-r--r--arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h17
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h15
-rw-r--r--arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h25
-rw-r--r--arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h18
-rw-r--r--arm_compute/core/utils/misc/ShapeCalculator.h4
8 files changed, 81 insertions, 68 deletions
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h
index 0f3f4bfc76..f80985a936 100644
--- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h
@@ -39,15 +39,16 @@ public:
CLDepthwiseConvolutionLayer3x3NCHWKernel();
/** Initialize the function's source, destination, conv and border_size.
*
- * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input.
- * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported.
+ * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32.
+ * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input.
+ * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported.
*/
- void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
ActivationLayerInfo act_info) override;
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h
index 4ecc07af6a..62b9a8682e 100644
--- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h
@@ -40,29 +40,31 @@ public:
/** Default move assignment operator. */
/** Initialize the function's source, destination, conv and border_size.
*
- * @param[in] input Source tensor. DataType supported: QASYMM8.
- * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. Data type supported: Same as @p input.
- * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
+ * @param[in] input Source tensor. DataType supported: QASYMM8.
+ * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. Data type supported: Same as @p input.
+ * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
*/
- void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
ActivationLayerInfo act_info) override;
/** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel
*
- * @param[in] input Source tensor. DataType supported: QASYMM8.
- * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. Data type supported: Same as @p input.
- * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input.
- * @param[in] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
+ * @param[in] input Source tensor. DataType supported: QASYMM8.
+ * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. Data type supported: Same as @p input.
+ * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input.
+ * @param[in] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
ActivationLayerInfo act_info = ActivationLayerInfo());
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h
index 86b8b90e94..b8343da50a 100644
--- a/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h
@@ -33,7 +33,7 @@ class ICLTensor;
/** Interface for the depthwise im2col reshape kernel.
* This kernel reshape the input low 3 dimensions to a new 3D shape where the output's first dimension is
- * the linear patch size (FILTER_WIDTH * FILTER_HEIGHT) and second dimension is number of patches in per image and third dimension unchanged .
+ * the linear patch size (FILTER_WIDTH * FILTER_HEIGHT) and second dimension is number of patches per image and third dimension unchanged .
**/
class CLDepthwiseIm2ColKernel : public ICLKernel
{
@@ -50,15 +50,16 @@ public:
CLDepthwiseIm2ColKernel &operator=(CLDepthwiseIm2ColKernel &&) = default;
/** Set the input and output of the kernel.
*
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F32
- * @param[out] output The output tensor. First 3 lower dimensions represent a transform of each 3D input,
- * while every dimension above 3 represents a batch. Data types supported: Same as @p input
- * @param[in] kernel_dims The kernel dimensions (width and height).
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] has_bias Boolean that specifies if the depthwise convolution has bias.
+ * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F32
+ * @param[out] output The output tensor. First 3 lower dimensions represent a transform of each 3D input,
+ * while every dimension above 3 represents a batch. Data types supported: Same as @p input
+ * @param[in] kernel_dims The kernel dimensions (width and height).
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] has_bias Boolean that specifies if the depthwise convolution has bias.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
*/
- void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias = false);
+ void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias = false, unsigned int depth_multiplier = 1);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h
index f02ba331b5..15233c5c32 100644
--- a/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h
+++ b/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h
@@ -50,15 +50,16 @@ public:
ICLDepthwiseConvolutionLayer3x3Kernel &operator=(ICLDepthwiseConvolutionLayer3x3Kernel &&) = default;
/** Initialize the function's source, destination, conv and border_size.
*
- * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input.
- * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported.
+ * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32.
+ * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input.
+ * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported.
*/
- virtual void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+ virtual void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
ActivationLayerInfo act_info = ActivationLayerInfo()) = 0;
protected:
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h
index 46b9e897bb..7d0d80649e 100644
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h
@@ -47,14 +47,15 @@ public:
GCDepthwiseConvolutionLayer3x3Kernel &operator=(GCDepthwiseConvolutionLayer3x3Kernel &&) = default;
/** Initialize the function's source, destination, conv and border_size.
*
- * @param[in] input Source tensor. DataType supported: F16.
- * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input.
- * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] input Source tensor. DataType supported: F16.
+ * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input.
+ * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
*/
- void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info);
+ void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
// Inherited methods overridden:
void run(const Window &window) override;
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h
index 0c2f30a98c..bd9e7eb781 100644
--- a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h
+++ b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h
@@ -53,23 +53,25 @@ public:
NEDepthwiseConvolutionLayer3x3Kernel &operator=(NEDepthwiseConvolutionLayer3x3Kernel &&) = default;
/** Initialize the function's source, destination, conv and border_size.
*
- * @param[in] input Source tensor. DataType supported: QASYMM8, F32.
- * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] data_layout (Optional) Data layout of the input and weights tensor
+ * @param[in] input Source tensor. DataType supported: QASYMM8, F32.
+ * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] data_layout (Optional) Data layout of the input and weights tensor
*/
- void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info, DataLayout data_layout = DataLayout::NCHW);
+ void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, DataLayout data_layout = DataLayout::NCHW);
/** Static method that checks if optimized execution is supported for the given parameters
*
- * @param[in] input_shape Input shape
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] dt Data type of the input and weights
- * @param[in] data_layout (Optional) Data layout of the input and weights tensor
+ * @param[in] input_shape Input shape
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] dt Data type of the input and weights
+ * @param[in] data_layout (Optional) Data layout of the input and weights tensor
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
*
* @return True if the optimized kernels can be executed else false
*/
- static bool is_optimized_execution_possible(TensorShape input_shape, PadStrideInfo conv_info, DataType dt, DataLayout data_layout = DataLayout::NCHW);
+ static bool is_optimized_execution_possible(TensorShape input_shape, PadStrideInfo conv_info, DataType dt, unsigned int depth_multiplier = 1, DataLayout data_layout = DataLayout::NCHW);
/** Generates the convolver object */
void generate_convolver();
@@ -110,6 +112,7 @@ private:
std::unique_ptr<depthwise::IDepthwiseConvolution> _convolver;
unsigned int _num_elems_written_per_iteration;
bool _run_optimized;
+ unsigned int _depth_multiplier;
};
} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTIONKERNEL3x3_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h
index ca10bfaab2..9c11cfa425 100644
--- a/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h
@@ -54,15 +54,16 @@ public:
NEDepthwiseIm2ColKernel &operator=(NEDepthwiseIm2ColKernel &&) = default;
/** Set the input and output of the kernel.
*
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8, F32
- * @param[out] output The output tensor. First 3 lower dimensions represent a transform of each 3D input,
- * while every dimension above 3 represents a batch. Data types supported: Same as @p input
- * @param[in] kernel_dims The kernel dimensions (width and height).
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] has_bias Boolean that specifies if the depthwise convolution has bias.
+ * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8, F32
+ * @param[out] output The output tensor. First 3 lower dimensions represent a transform of each 3D input,
+ * while every dimension above 3 represents a batch. Data types supported: Same as @p input
+ * @param[in] kernel_dims The kernel dimensions (width and height).
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] has_bias Boolean that specifies if the depthwise convolution has bias.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
*/
- void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias = false);
+ void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias = false, unsigned int depth_multiplier = 1);
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
@@ -87,6 +88,7 @@ private:
Size2D _kernel_dims;
PadStrideInfo _conv_info;
bool _has_bias;
+ unsigned int _depth_multiplier;
};
} // arm_compute
#endif /*__ARM_COMPUTE_NEDEPTHWISEIM2COLKERNEL_H__ */
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index 7d07d4619b..a0bc4eab54 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -125,7 +125,7 @@ inline TensorShape compute_transposed_shape(const ITensorInfo &input)
return shape_transposed;
}
-inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info)
+inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info, unsigned int depth_multiplier)
{
const TensorShape input_shape{ input.tensor_shape() };
const TensorShape weights_shape{ weights.tensor_shape() };
@@ -133,6 +133,7 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input,
const DataLayout data_layout = input.data_layout();
const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
unsigned int output_width = 0;
unsigned int output_height = 0;
@@ -143,6 +144,7 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input,
TensorShape output_shape{ input_shape };
output_shape.set(width_idx, output_width);
output_shape.set(height_idx, output_height);
+ output_shape.set(channel_idx, input_shape[channel_idx] * depth_multiplier);
return output_shape;
}