aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/assembly
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/kernels/assembly')
-rw-r--r--src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h88
-rw-r--r--src/core/NEON/kernels/assembly/common.hpp34
-rw-r--r--src/core/NEON/kernels/assembly/depthwise.hpp170
-rw-r--r--src/core/NEON/kernels/assembly/depthwise_common.hpp131
-rw-r--r--src/core/NEON/kernels/assembly/pool_common.hpp9
5 files changed, 336 insertions, 96 deletions
diff --git a/src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h b/src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h
deleted file mode 100644
index a956898403..0000000000
--- a/src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H
-#define SRC_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H
-
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/NEON/INEKernel.h"
-
-#include "src/core/NEON/kernels/convolution/depthwise/depthwise.hpp"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** This class is a wrapper for the depthwise convolution assembly kernels. */
-class NEDepthwiseConvolutionAssemblyKernelWrapper final : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEDepthwiseConvolutionAssemblyKernelWrapper";
- }
-
- /** Default constructor */
- NEDepthwiseConvolutionAssemblyKernelWrapper()
- : _kernel(nullptr)
- {
- }
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionAssemblyKernelWrapper(const NEDepthwiseConvolutionAssemblyKernelWrapper &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionAssemblyKernelWrapper &operator=(const NEDepthwiseConvolutionAssemblyKernelWrapper &) = delete;
- /** Default Move Constructor. */
- NEDepthwiseConvolutionAssemblyKernelWrapper(NEDepthwiseConvolutionAssemblyKernelWrapper &&) = default;
- /** Default move assignment operator */
- NEDepthwiseConvolutionAssemblyKernelWrapper &operator=(NEDepthwiseConvolutionAssemblyKernelWrapper &&) = default;
-
- /** Initialise the kernel's input and output.
- *
- * @param[in] kernel Pointer to an assembly kernel implementation.
- */
- void configure(depthwise::IDepthwiseConvolution *kernel)
- {
- ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(kernel)));
- _kernel = kernel;
- Window win;
- win.set(Window::DimX, Window::Dimension(0, _kernel->get_window(), 1));
- INEKernel::configure(win);
- }
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override
- {
- ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(_kernel)));
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- auto first = window.x().start();
- auto last = window.x().end();
- _kernel->run(first, last, info.thread_id);
- }
-
-private:
- depthwise::IDepthwiseConvolution *_kernel;
-};
-} // namespace arm_compute
-#endif /* SRC_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H */
diff --git a/src/core/NEON/kernels/assembly/common.hpp b/src/core/NEON/kernels/assembly/common.hpp
new file mode 100644
index 0000000000..d82d11cae0
--- /dev/null
+++ b/src/core/NEON/kernels/assembly/common.hpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+
+namespace arm_conv
+{
+struct PaddingValues
+{
+ unsigned int left, top, right, bottom;
+};
+
+} // namespace arm_conv
diff --git a/src/core/NEON/kernels/assembly/depthwise.hpp b/src/core/NEON/kernels/assembly/depthwise.hpp
new file mode 100644
index 0000000000..eadf48d003
--- /dev/null
+++ b/src/core/NEON/kernels/assembly/depthwise.hpp
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+
+#include "arm_gemm.hpp"
+#include "arm_gemm_local.hpp"
+#include "depthwise_common.hpp"
+
+namespace arm_conv
+{
+namespace depthwise
+{
+struct DepthwiseConfig
+{
+ DepthwiseMethod method = DepthwiseMethod::DEFAULT;
+ std::string filter = "";
+
+ DepthwiseConfig(DepthwiseMethod method)
+ : method(method) {};
+ DepthwiseConfig() {};
+};
+
+struct DepthwiseArgs
+{
+ const CPUInfo *cpu_info;
+
+ unsigned int kernel_rows, kernel_cols;
+ unsigned int stride_rows, stride_cols;
+
+ unsigned int n_batches, input_rows, input_cols, input_channels;
+ unsigned int output_rows, output_cols;
+ unsigned int channel_multiplier;
+
+ PaddingValues padding;
+
+ arm_gemm::Activation activation;
+
+ const DepthwiseConfig *config;
+
+ DepthwiseArgs(
+ const CPUInfo *cpu_info,
+ unsigned int kernel_rows, unsigned int kernel_cols,
+ unsigned int stride_rows, unsigned int stride_cols,
+ unsigned int n_batches, unsigned int input_rows, unsigned int input_cols,
+ unsigned int input_channels,
+ unsigned int output_rows, unsigned int output_cols,
+ unsigned int channel_multiplier,
+ PaddingValues padding, arm_gemm::Activation activation,
+ const DepthwiseConfig *config)
+ : cpu_info(cpu_info), kernel_rows(kernel_rows), kernel_cols(kernel_cols), stride_rows(stride_rows), stride_cols(stride_cols), n_batches(n_batches), input_rows(input_rows), input_cols(input_cols),
+ input_channels(input_channels), output_rows(output_rows), output_cols(output_cols), channel_multiplier(channel_multiplier), padding(padding), activation(activation), config(config)
+ {
+ }
+};
+
+template <typename TInput, typename TWeight, typename TOutput>
+class DepthwiseCommon : public IDepthwiseCommon
+{
+protected:
+ const DepthwiseArgs m_args; // Copy of arguments
+
+public:
+ DepthwiseCommon(const DepthwiseArgs &args)
+ : m_args(args) {};
+ DepthwiseCommon(DepthwiseCommon &) = delete;
+ DepthwiseCommon &operator=(DepthwiseCommon &) = delete;
+
+ void execute(
+ const void *const input,
+ const void *const parameters,
+ void *const output,
+ void *const working_space,
+ const unsigned int thread_id,
+ const unsigned int n_threads) const override
+ {
+ const size_t ld_input_col = m_args.input_channels;
+ const size_t ld_input_row = ld_input_col * m_args.input_cols;
+ const size_t ld_input_batch = ld_input_row * m_args.input_rows;
+ const size_t ld_output_col = m_args.input_channels * m_args.channel_multiplier;
+ const size_t ld_output_row = ld_output_col * m_args.output_cols;
+ const size_t ld_output_batch = ld_output_row * m_args.output_rows;
+
+ execute(
+ input, ld_input_col, ld_input_row, ld_input_batch,
+ parameters, output, ld_output_col, ld_output_row, ld_output_batch,
+ working_space, thread_id, n_threads);
+ }
+
+ void execute(
+ const void *const input,
+ size_t ld_input_col,
+ size_t ld_input_row,
+ size_t ld_input_batch,
+ const void *const parameters,
+ void *const output,
+ size_t ld_output_col,
+ size_t ld_output_row,
+ size_t ld_output_batch,
+ void *const working_space,
+ const unsigned int thread_id,
+ const unsigned int n_threads) const override
+ {
+ execute(
+ m_args.n_batches, m_args.input_rows, m_args.input_cols,
+ m_args.input_channels, m_args.padding,
+ input, ld_input_col, ld_input_row, ld_input_batch,
+ parameters,
+ m_args.output_rows, m_args.output_cols,
+ output, ld_output_col, ld_output_row, ld_output_batch,
+ working_space, thread_id, n_threads);
+ }
+
+ virtual void execute(
+ unsigned int batches,
+ unsigned int input_height,
+ unsigned int input_width,
+ unsigned int channels,
+ const PaddingValues &,
+ const void *input,
+ size_t ld_input_col,
+ size_t ld_input_row,
+ size_t ld_input_batch,
+ const void *parameters,
+ unsigned int output_height,
+ unsigned int output_width,
+ void *output,
+ size_t ld_output_col,
+ size_t ld_output_row,
+ size_t ld_output_batch,
+ void *working_space,
+ unsigned int thread_id,
+ unsigned int n_threads) const override = 0;
+};
+
+template <typename TInput, typename TWeight = TInput, typename TOutput = TInput>
+using UniqueDepthwiseCommon = std::unique_ptr<DepthwiseCommon<TInput, TWeight, TOutput>>;
+
+template <typename TInput, typename TWeight = TInput, typename TOutput = TInput, class OutputStage = Nothing>
+KernelDescription get_depthwise_method(const DepthwiseArgs &, const OutputStage & = {});
+
+template <typename TInput, typename TWeight = TInput, typename TOutput = TInput, class OutputStage = Nothing>
+UniqueDepthwiseCommon<TInput, TWeight, TOutput> depthwise(const DepthwiseArgs &, const OutputStage & = {});
+
+template <typename TInput, typename TWeight = TInput, typename TOutput = TInput, class OutputStage = Nothing>
+std::vector<KernelDescription> get_compatible_kernels(const DepthwiseArgs &, const OutputStage & = {});
+
+} // namespace depthwise
+} // namespace arm_conv
diff --git a/src/core/NEON/kernels/assembly/depthwise_common.hpp b/src/core/NEON/kernels/assembly/depthwise_common.hpp
new file mode 100644
index 0000000000..52963ab357
--- /dev/null
+++ b/src/core/NEON/kernels/assembly/depthwise_common.hpp
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+
+#include "arm_gemm.hpp"
+#include "common.hpp"
+
+namespace arm_conv
+{
+namespace depthwise
+{
+using arm_gemm::Nothing;
+
+enum class DepthwiseMethod
+{
+ DEFAULT,
+ DEPTHFIRST,
+ PLANAR,
+};
+
+struct KernelDescription
+{
+ DepthwiseMethod method = DepthwiseMethod::DEFAULT;
+ std::string name = "";
+ bool is_default = false;
+ uint64_t cycle_estimate = 0;
+
+ KernelDescription(
+ DepthwiseMethod method,
+ std::string name,
+ bool is_default,
+ uint64_t cycle_estimate)
+ : method(method), name(name), is_default(is_default), cycle_estimate(cycle_estimate)
+ {
+ }
+
+ KernelDescription() noexcept {};
+};
+
+class IDepthwiseCommon
+{
+public:
+ virtual ~IDepthwiseCommon() = default;
+
+ // Determine the amount of storage space required for the rearranged weights
+ // and bias.
+ virtual size_t get_storage_size(void) const = 0;
+
+ // Rearrange the weights and biases into a storage buffer.
+ // Accepts a pointer to a buffer into which to store the packed parameters, a
+ // pointer the bias vector (which may be nullptr in the case of no bias) and
+ // a pointer to the array of weights (stored in HWIO order).
+ virtual void pack_parameters(
+ void *buffer,
+ const void *biases,
+ const void *weights,
+ size_t ld_weight_col = 0,
+ size_t ld_weight_row = 0) = 0;
+
+ // Determine the amount of working space required
+ virtual size_t get_working_size(unsigned int n_threads, unsigned int n_input_channels) const = 0;
+
+ // Execute the convolution over the specified area of memory.
+ virtual void execute(
+ const void *input, // Pointer to input tensor
+ const void *parameters, // Packed parameters buffer
+ void *output,
+ void *working_space,
+ unsigned int thread_id,
+ unsigned int n_threads) const = 0;
+
+ virtual void execute(
+ const void *input,
+ size_t ld_input_col,
+ size_t ld_input_row,
+ size_t ld_input_batch,
+ const void *parameters,
+ void *output,
+ size_t ld_output_col,
+ size_t ld_output_row,
+ size_t ld_output_batch,
+ void *working_space,
+ unsigned int thread_id,
+ unsigned int n_threads) const = 0;
+
+ virtual void execute(
+ unsigned int batches,
+ unsigned int input_height,
+ unsigned int input_width,
+ unsigned int channels,
+ const PaddingValues &,
+ const void *input,
+ size_t ld_input_col,
+ size_t ld_input_row,
+ size_t ld_input_batch,
+ const void *parameters,
+ unsigned int output_height,
+ unsigned int output_width,
+ void *output,
+ size_t ld_output_col,
+ size_t ld_output_row,
+ size_t ld_output_batch,
+ void *working_space,
+ unsigned int thread_id,
+ unsigned int n_threads) const = 0;
+};
+
+} // namespace depthwise
+} // namespace arm_conv
diff --git a/src/core/NEON/kernels/assembly/pool_common.hpp b/src/core/NEON/kernels/assembly/pool_common.hpp
index fdc18aef39..b6a0a0abed 100644
--- a/src/core/NEON/kernels/assembly/pool_common.hpp
+++ b/src/core/NEON/kernels/assembly/pool_common.hpp
@@ -24,9 +24,7 @@
#pragma once
-#ifdef CYCLE_PROFILING
-#include "profiler.hpp"
-#endif // CYCLE_PROFILING
+#include "common.hpp"
namespace arm_conv
{
@@ -55,11 +53,6 @@ struct PoolingStride
unsigned int rows, cols;
};
-struct PaddingValues
-{
- unsigned int left, top, right, bottom;
-};
-
class IPoolingCommon
{
public: