aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp16
1 files changed, 13 insertions, 3 deletions
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
index a02b84fba1..a54e92c63a 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
@@ -69,7 +69,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
return Status{};
}
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *weights, ITensorInfo *output, const PadStrideInfo &conv_info)
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *weights, ITensorInfo *bias, ITensorInfo *output,
+ const PadStrideInfo &conv_info)
{
const unsigned int num_rows_processed_per_iteration = 4;
const unsigned int num_elems_accessed_per_iteration = 4;
@@ -88,6 +89,12 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
bool window_changed = update_window_and_padding(win, input_access, weights_access, output_access);
+ if(bias != nullptr)
+ {
+ AccessWindowHorizontal bias_access(bias, 0, num_elems_accessed_per_iteration);
+ window_changed = window_changed || update_window_and_padding(win, bias_access);
+ }
+
output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
@@ -191,7 +198,7 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input,
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
// Configure kernel window
- auto win_config = validate_and_configure_window(input->info(), weights->info(), output->info(), conv_info);
+ auto win_config = validate_and_configure_window(input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), conv_info);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
ICLKernel::configure(win_config.second);
@@ -214,7 +221,10 @@ Status CLDepthwiseConvolutionLayer3x3NHWCKernel::validate(const ITensorInfo *inp
ActivationLayerInfo act_info)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, biases, output, conv_info, depth_multiplier, act_info));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), weights->clone().get(), output->clone().get(), conv_info).first);
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), weights->clone().get(),
+ biases != nullptr ? biases->clone().get() : nullptr,
+ output->clone().get(), conv_info)
+ .first);
return Status{};
}