aboutsummaryrefslogtreecommitdiff
path: root/src/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu')
-rw-r--r--src/cpu/operators/CpuDepthwiseConv2d.cpp22
-rw-r--r--src/cpu/operators/CpuDepthwiseConv2d.h3
-rw-r--r--src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp9
3 files changed, 29 insertions, 5 deletions
diff --git a/src/cpu/operators/CpuDepthwiseConv2d.cpp b/src/cpu/operators/CpuDepthwiseConv2d.cpp
index c93ffb113d..f69ecdc5bf 100644
--- a/src/cpu/operators/CpuDepthwiseConv2d.cpp
+++ b/src/cpu/operators/CpuDepthwiseConv2d.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -88,6 +88,7 @@ void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::configure(ITensorI
_is_nchw = src->data_layout() == DataLayout::NCHW;
_permute = _is_nchw;
_is_prepared = false;
+ _are_weights_const = weights->are_values_constant();
// Configure pipeline
_is_activationlayer_enabled = info.act_info.enabled() && !CpuDepthwiseConv2dAssemblyDispatch::is_activation_supported(info.act_info);
@@ -218,6 +219,25 @@ void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::run(ITensorPack &t
void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::prepare(ITensorPack &tensors)
{
+ // if weights are not constant then we need to repack so that weights
+ // can be updated in-place
+ if(!_are_weights_const)
+ {
+ auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
+ auto bias = tensors.get_const_tensor(TensorType::ACL_SRC_2);
+ auto packed_weights = tensors.get_tensor(TensorType::ACL_INT_4);
+
+ ITensorPack pack_opt;
+ pack_opt.add_tensor(TensorType::ACL_SRC_1, weights);
+ pack_opt.add_tensor(TensorType::ACL_SRC_2, bias);
+ pack_opt.add_tensor(TensorType::ACL_INT_1, packed_weights);
+
+ // Prepare optimized function
+ _dwc_optimized_func->prepare(pack_opt);
+
+ return;
+ }
+
if(!_is_prepared)
{
auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
diff --git a/src/cpu/operators/CpuDepthwiseConv2d.h b/src/cpu/operators/CpuDepthwiseConv2d.h
index 15e52ef515..3d8719ee44 100644
--- a/src/cpu/operators/CpuDepthwiseConv2d.h
+++ b/src/cpu/operators/CpuDepthwiseConv2d.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -143,6 +143,7 @@ private:
bool _permute{ false };
bool _is_activationlayer_enabled{ false };
bool _is_prepared{ false };
+ bool _are_weights_const{ true };
};
/** Basic function to execute a generic depthwise convolution. This function calls the following kernel:
diff --git a/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp b/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp
index e75b082ca5..a5b9eca56e 100644
--- a/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp
+++ b/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,6 +40,7 @@ struct CpuDepthwiseConv2dAssemblyDispatch::LocalImpl
{
std::unique_ptr<kernels::CpuDepthwiseConv2dAssemblyWrapperKernel> asm_kernel{ nullptr };
bool is_prepared{ false };
+ bool are_weights_const{ true };
experimental::MemoryRequirements mem_req{};
};
@@ -62,6 +63,7 @@ void CpuDepthwiseConv2dAssemblyDispatch::configure(const ITensorInfo *src,
const CPUInfo &ci = NEScheduler::get().cpu_info();
const unsigned int num_threads = NEScheduler::get().num_threads();
_pImpl->is_prepared = false;
+ _pImpl->are_weights_const = weights->are_values_constant();
// If we don't support a combination of data types, silently return: it is the caller's responsibility to check if configure() was successful via is_configured()
if(!CpuDepthwiseConv2dAssemblyDispatch::validate(src, weights, bias, dst, info))
@@ -107,10 +109,11 @@ void CpuDepthwiseConv2dAssemblyDispatch::run(ITensorPack &tensors)
void CpuDepthwiseConv2dAssemblyDispatch::prepare(ITensorPack &tensors)
{
- if(!_pImpl->is_prepared)
+ const ITensor *weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
+
+ if((!_pImpl->are_weights_const && weights != nullptr) || !_pImpl->is_prepared)
{
// Pack weights and bias
- const ITensor *weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
const ITensor *bias = tensors.get_const_tensor(TensorType::ACL_SRC_2);
ITensor *storage = tensors.get_tensor(TensorType::ACL_INT_1);