diff options
author | Michael Tyler <michael.tyler@arm.com> | 2023-06-30 11:26:05 +0100 |
---|---|---|
committer | michael.tyler <michael.tyler@arm.com> | 2023-07-04 14:34:58 +0000 |
commit | 8deee9bd9b9137c256c23b86be11dbf0466f3aa8 (patch) | |
tree | ac80b3bdd992552b65e306b77f061484da0591ca /src/core/NEON/kernels/arm_conv/depthwise/working_space.hpp | |
parent | 19844f605f5e5b71d05164711dee13f8652adafe (diff) | |
download | ComputeLibrary-8deee9bd9b9137c256c23b86be11dbf0466f3aa8.tar.gz |
Depthwise channel pre-multiplication
Resolves: COMPMID-6337
Change-Id: Ie9097b3f56e8071426c621386a5988bd7f7e8ef2
Signed-off-by: Michael Tyler <michael.tyler@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9852
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/arm_conv/depthwise/working_space.hpp')
-rw-r--r-- | src/core/NEON/kernels/arm_conv/depthwise/working_space.hpp | 32 |
1 files changed, 31 insertions, 1 deletions
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/working_space.hpp b/src/core/NEON/kernels/arm_conv/depthwise/working_space.hpp index b1fe66cea2..9805fd354f 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/working_space.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/working_space.hpp @@ -217,7 +217,7 @@ class InputBufferElement template <typename StratType, typename OutputStage> static size_t get_element_size(const WorkspaceArgs<StratType, OutputStage> &args) { - return sizeof(T) * args.depthwise_args.input_channels; + return sizeof(T) * args.depthwise_args.input_channels * args.depthwise_args.channel_multiplier; } template <class WorkspaceType, typename StratType, typename OutputStage> @@ -278,6 +278,36 @@ class OutputArrayElement }; +/* Intermediate array to store results of premultiplication. + * Used as input to the kernel instead of the original input array. + */ +template <typename T> +class IntermediateBufferElement +{ +public: + struct Workspace + { + T *intermediate_buffer; + }; + + template <typename StratType, typename OutputStage> + static size_t get_element_size(const WorkspaceArgs<StratType, OutputStage> &args) + { + auto cols = args.depthwise_args.input_cols + args.depthwise_args.kernel_cols; + auto rows = args.strategy->get_input_rows() + args.depthwise_args.kernel_rows; + auto channels = args.depthwise_args.input_channels * args.depthwise_args.channel_multiplier; + return sizeof(T) * cols * rows * channels; + } + + template <class WorkspaceType, typename StratType, typename OutputStage> + static void *initialise(WorkspaceType *ws, void *buffer, const WorkspaceArgs<StratType, OutputStage> &args) + { + ws->intermediate_buffer = reinterpret_cast<T*>(buffer); + return reinterpret_cast<char *>(buffer) + get_element_size(args); + } +}; + + /* Container for requantization parameters. * * This removes the distinction between per-layer and per-channel |