aboutsummaryrefslogtreecommitdiff
path: root/src/core/cpu
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2021-07-28 14:10:47 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-07-28 16:31:53 +0000
commita86433a64ab25c2ea8e274bd2f357a9709636f5b (patch)
tree48978522014f8d0423522ec37f2d491bee110944 /src/core/cpu
parent120c87b1622fa2472d27e43e5b7d861fb9735d9c (diff)
downloadComputeLibrary-a86433a64ab25c2ea8e274bd2f357a9709636f5b.tar.gz
Reduce binary footprint of CpuConvertFullyConnectedWeightsKernel
Binary size reduction for this kernel is almost 50%. Also remove unused NEConvertFullyConnectedWeightsManaged. Change-Id: Ia46a1342a0737397b4aac2578d963c2ebb7446e3 Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6011 Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/cpu')
-rw-r--r--src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.cpp41
-rw-r--r--src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h9
2 files changed, 11 insertions, 39 deletions
diff --git a/src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.cpp b/src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.cpp
index 5bf70dc9bf..5406356bc9 100644
--- a/src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.cpp
+++ b/src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.cpp
@@ -81,22 +81,6 @@ Status CpuConvertFullyConnectedWeightsKernel::validate(const ITensorInfo *src, c
return Status{};
}
-template <typename T>
-void CpuConvertFullyConnectedWeightsKernel::run_convert_fc_weights(const ITensor *in, ITensor *out, const Window &window)
-{
- const unsigned int dst_stride_x = out->info()->strides_in_bytes().x();
- const unsigned int dst_stride_y = out->info()->strides_in_bytes().y();
-
- Iterator input(in, window);
- Iterator output(out, window);
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- *reinterpret_cast<T *>(output.ptr() + id.x() * dst_stride_x + (id.y() % _factor1 * _factor2 + id.y() / _factor1) * dst_stride_y) = *reinterpret_cast<T *>(input.ptr());
- },
- input);
-}
-
void CpuConvertFullyConnectedWeightsKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(info);
@@ -106,21 +90,18 @@ void CpuConvertFullyConnectedWeightsKernel::run_op(ITensorPack &tensors, const W
const auto src = tensors.get_const_tensor(TensorType::ACL_SRC);
auto dst = tensors.get_tensor(TensorType::ACL_DST);
- switch(src->info()->element_size())
+ const unsigned int dst_stride_x = dst->info()->strides_in_bytes().x();
+ const unsigned int dst_stride_y = dst->info()->strides_in_bytes().y();
+ const unsigned int element_size = src->info()->element_size();
+
+ Iterator input(src, window);
+ Iterator output(dst, window);
+
+ execute_window_loop(window, [&](const Coordinates & id)
{
- case 1:
- run_convert_fc_weights<uint8_t>(src, dst, window);
- break;
- case 2:
- run_convert_fc_weights<uint16_t>(src, dst, window);
- break;
- case 4:
- run_convert_fc_weights<uint32_t>(src, dst, window);
- break;
- default:
- ARM_COMPUTE_ERROR("Data type not supported.");
- break;
- }
+ memcpy(output.ptr() + id.x() * dst_stride_x + (id.y() % _factor1 * _factor2 + id.y() / _factor1) * dst_stride_y, input.ptr(), element_size);
+ },
+ input);
}
const char *CpuConvertFullyConnectedWeightsKernel::name() const
diff --git a/src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h b/src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h
index 3ba3162c34..7baaf13417 100644
--- a/src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h
+++ b/src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h
@@ -69,15 +69,6 @@ public:
private:
unsigned int _factor1{ 0 }; /* equals to the number of elements per original src plane if @p data_layout == NCHW; its number of channels otherwise */
unsigned int _factor2{ 0 }; /* equals to the number of elements per original src plane if @p data_layout == NHWC; its number of channels otherwise */
-
- /** Template function to run the permute
- *
- * @param[in] in Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All.
- * @param[in] out The converted weights tensor info. Shape and Data Type: Same as @p in.
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename T>
- void run_convert_fc_weights(const ITensor *in, ITensor *out, const Window &window);
};
} // namespace kernels
} // namespace cpu