aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/Types.h6
-rw-r--r--arm_compute/core/utils/misc/ShapeCalculator.h24
-rw-r--r--arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h49
3 files changed, 23 insertions, 56 deletions
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index b1f340d18e..b5fd21d29d 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -1874,12 +1874,6 @@ struct ConvolutionInfo
Size2D dilation{ Size2D(1, 1) }; /**< Dilation, in elements, across x and y. Defaults to (1, 1). */
};
-struct DepthwiseConvolutionReshapeInfo
-{
- unsigned int c0{ 1 }; /**< Number of channels processed by the depth-wise convolution */
- bool transpose{ false }; /**< True if the block MxC0 (where M is the area of the filter i.e. KwxKh) has to be transposed */
-};
-
/** GEMMLowp output stage type */
enum class GEMMLowpOutputStageType
{
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index ba37f9a61e..8e49c068af 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -287,30 +287,6 @@ inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_inte
return shape_interleaved_a;
}
-/** Calculate the reshaped shape of the weights to use in depthwise convolution
- *
- * @param[in] input Input tensor info
- * @param[in] info Depthwise convolution information to be used for reshaping.
- *
- * @return the calculated shape
- */
-inline TensorShape compute_reshaped_depthwise_weights_shape(const ITensorInfo &input, const DepthwiseConvolutionReshapeInfo &info)
-{
- const auto data_layout = input.data_layout();
- TensorShape weights_shape{};
-
- const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
- const size_t num_channels = input.dimension(channel_idx);
- const size_t num_rows = input.dimension(height_idx);
- const size_t num_cols = input.dimension(width_idx);
-
- weights_shape.set(0, num_rows * num_cols * info.c0);
- weights_shape.set(1, DIV_CEIL(num_channels, info.c0));
- return weights_shape;
-}
-
/** Calculate the transposed 1xW shape
*
* @param[in] b Input tensor info
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
index e2c5d683cf..1af9e1dc6f 100644
--- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
@@ -35,8 +35,8 @@ namespace arm_compute
class CLCompileContext;
class CLFillBorderKernel;
class CLDepthwiseConvolutionLayerNativeKernel;
-class CLDepthwiseConvolutionLayerReshapeWeightsKernel;
-class ICLDepthwiseConvolutionLayer3x3Kernel;
+class CLDepthwiseConvolutionLayer3x3NCHWKernel;
+class CLDepthwiseConvolutionLayer3x3NHWCKernel;
class ICLTensor;
/** Function to execute a depthwise convolution
@@ -123,19 +123,17 @@ private:
* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard.
*
* @return a Depthwise Convolution Function
*/
static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
- ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), GPUTarget gpu_target = GPUTarget::MIDGARD);
+ ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
/** Basic function to execute a depthwise convolution for kernel size 3x3xC (when data layout NCHW) or Cx3x3 (when data layout NHWC). This function calls the following OpenCL kernels:
*
* -# @ref CLDepthwiseConvolutionLayer3x3NCHWKernel (if data_layout == NCHW)
* -# @ref CLDepthwiseConvolutionLayer3x3NHWCKernel (if data_layout == NHWC)
- * -# @ref CLDepthwiseConvolutionLayerReshapeWeightsKernel (if data_layout == NHWC)
* -# @ref CLFillBorderKernel (if pad_x or pad_y > 0)
*
*/
@@ -200,7 +198,7 @@ private:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
- ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD, const Size2D &dilation = Size2D(1U, 1U));
+ ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
// Inherited methods overriden:
void run() override;
@@ -212,26 +210,25 @@ private:
};
private:
- MemoryGroup _memory_group;
- std::unique_ptr<ICLDepthwiseConvolutionLayer3x3Kernel> _kernel;
- std::unique_ptr<CLFillBorderKernel> _border_handler;
- CLPermute _permute_input_to_nchw;
- CLPermute _permute_weights_to_nchw;
- CLPermute _permute_output_to_nhwc;
- std::unique_ptr<CLDepthwiseConvolutionLayerReshapeWeightsKernel> _reshape_weights;
- CLTensor _permuted_input;
- CLTensor _permuted_weights;
- CLTensor _permuted_output;
- CLTensor _output_multipliers;
- CLTensor _output_shifts;
- const ITensor *_original_weights;
- const ITensor *_input;
- const ITensor *_output;
- bool _needs_permute;
- bool _needs_weights_reshape;
- bool _is_prepared;
- bool _is_quantized;
- bool _is_nhwc;
+ MemoryGroup _memory_group;
+ std::unique_ptr<CLDepthwiseConvolutionLayer3x3NCHWKernel> _kernel_nchw;
+ std::unique_ptr<CLDepthwiseConvolutionLayer3x3NHWCKernel> _kernel_nhwc;
+ std::unique_ptr<CLFillBorderKernel> _border_handler;
+ CLPermute _permute_input_to_nchw;
+ CLPermute _permute_weights_to_nchw;
+ CLPermute _permute_output_to_nhwc;
+ CLTensor _permuted_input;
+ CLTensor _permuted_weights;
+ CLTensor _permuted_output;
+ CLTensor _output_multipliers;
+ CLTensor _output_shifts;
+ const ITensor *_original_weights;
+ const ITensor *_input;
+ const ITensor *_output;
+ bool _needs_permute;
+ bool _is_prepared;
+ bool _is_quantized;
+ bool _is_nhwc;
};
/** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels: