aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2021-07-28 14:10:47 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-07-28 16:31:53 +0000
commita86433a64ab25c2ea8e274bd2f357a9709636f5b (patch)
tree48978522014f8d0423522ec37f2d491bee110944
parent120c87b1622fa2472d27e43e5b7d861fb9735d9c (diff)
downloadComputeLibrary-a86433a64ab25c2ea8e274bd2f357a9709636f5b.tar.gz
Reduce binary footprint of CpuConvertFullyConnectedWeightsKernel
Binary size reduction for this kernel is almost 50%. Also remove unused NEConvertFullyConnectedWeightsManaged. Change-Id: Ia46a1342a0737397b4aac2578d963c2ebb7446e3 Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6011 Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h45
-rw-r--r--src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.cpp41
-rw-r--r--src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h9
-rw-r--r--src/runtime/NEON/functions/NEConvertFullyConnectedWeights.cpp1
4 files changed, 15 insertions, 81 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
index 218877d421..a892d3036b 100644
--- a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
+++ b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
@@ -25,13 +25,14 @@
#define ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/ITransformWeights.h"
-#include "arm_compute/runtime/Tensor.h"
+
+#include "arm_compute/core/Types.h"
namespace arm_compute
{
// Forward declarations
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref cpu::kernels::CpuConvertFullyConnectedWeightsKernel. */
class NEConvertFullyConnectedWeights : public IFunction
@@ -84,45 +85,5 @@ private:
struct Impl;
std::unique_ptr<Impl> _impl;
};
-
-namespace weights_transformations
-{
-/** Basic function to manage @ref NEConvertFullyConnectedWeights. */
-class NEConvertFullyConnectedWeightsManaged : public ITransformWeights
-{
-public:
- void run() override
- {
- _output.allocator()->allocate();
- _func.run();
- _reshape_run = true;
- }
-
- void release() override
- {
- _output.allocator()->free();
- }
-
- ITensor *get_weights() override
- {
- return &_output;
- }
-
- uint32_t uid() override
- {
- return _uid;
- }
-
- void configure(const ITensor *input, const TensorShape &original_input_shape, DataLayout data_layout)
- {
- _func.configure(input, &_output, original_input_shape, data_layout);
- }
-
-private:
- static constexpr uint32_t _uid = 0x4;
- Tensor _output{};
- NEConvertFullyConnectedWeights _func{};
-};
-} // namespace weights_transformations
} // namespace arm_compute
#endif /* ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H */
diff --git a/src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.cpp b/src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.cpp
index 5bf70dc9bf..5406356bc9 100644
--- a/src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.cpp
+++ b/src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.cpp
@@ -81,22 +81,6 @@ Status CpuConvertFullyConnectedWeightsKernel::validate(const ITensorInfo *src, c
return Status{};
}
-template <typename T>
-void CpuConvertFullyConnectedWeightsKernel::run_convert_fc_weights(const ITensor *in, ITensor *out, const Window &window)
-{
- const unsigned int dst_stride_x = out->info()->strides_in_bytes().x();
- const unsigned int dst_stride_y = out->info()->strides_in_bytes().y();
-
- Iterator input(in, window);
- Iterator output(out, window);
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- *reinterpret_cast<T *>(output.ptr() + id.x() * dst_stride_x + (id.y() % _factor1 * _factor2 + id.y() / _factor1) * dst_stride_y) = *reinterpret_cast<T *>(input.ptr());
- },
- input);
-}
-
void CpuConvertFullyConnectedWeightsKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(info);
@@ -106,21 +90,18 @@ void CpuConvertFullyConnectedWeightsKernel::run_op(ITensorPack &tensors, const W
const auto src = tensors.get_const_tensor(TensorType::ACL_SRC);
auto dst = tensors.get_tensor(TensorType::ACL_DST);
- switch(src->info()->element_size())
+ const unsigned int dst_stride_x = dst->info()->strides_in_bytes().x();
+ const unsigned int dst_stride_y = dst->info()->strides_in_bytes().y();
+ const unsigned int element_size = src->info()->element_size();
+
+ Iterator input(src, window);
+ Iterator output(dst, window);
+
+ execute_window_loop(window, [&](const Coordinates & id)
{
- case 1:
- run_convert_fc_weights<uint8_t>(src, dst, window);
- break;
- case 2:
- run_convert_fc_weights<uint16_t>(src, dst, window);
- break;
- case 4:
- run_convert_fc_weights<uint32_t>(src, dst, window);
- break;
- default:
- ARM_COMPUTE_ERROR("Data type not supported.");
- break;
- }
+ memcpy(output.ptr() + id.x() * dst_stride_x + (id.y() % _factor1 * _factor2 + id.y() / _factor1) * dst_stride_y, input.ptr(), element_size);
+ },
+ input);
}
const char *CpuConvertFullyConnectedWeightsKernel::name() const
diff --git a/src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h b/src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h
index 3ba3162c34..7baaf13417 100644
--- a/src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h
+++ b/src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h
@@ -69,15 +69,6 @@ public:
private:
unsigned int _factor1{ 0 }; /* equals to the number of elements per original src plane if @p data_layout == NCHW; its number of channels otherwise */
unsigned int _factor2{ 0 }; /* equals to the number of elements per original src plane if @p data_layout == NHWC; its number of channels otherwise */
-
- /** Template function to run the permute
- *
- * @param[in] in Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All.
- * @param[in] out The converted weights tensor info. Shape and Data Type: Same as @p in.
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename T>
- void run_convert_fc_weights(const ITensor *in, ITensor *out, const Window &window);
};
} // namespace kernels
} // namespace cpu
diff --git a/src/runtime/NEON/functions/NEConvertFullyConnectedWeights.cpp b/src/runtime/NEON/functions/NEConvertFullyConnectedWeights.cpp
index f2253d8be4..1f6b3c94e2 100644
--- a/src/runtime/NEON/functions/NEConvertFullyConnectedWeights.cpp
+++ b/src/runtime/NEON/functions/NEConvertFullyConnectedWeights.cpp
@@ -23,6 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h"
+#include "arm_compute/core/Validate.h"
#include "src/runtime/cpu/operators/CpuConvertFullyConnectedWeights.h"
namespace arm_compute