aboutsummaryrefslogtreecommitdiff
path: root/src/runtime
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2020-05-21 15:02:36 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2020-06-16 11:42:09 +0000
commitbcd2352d7fd99a2f6aab220fa0c3b3f3119a1a4c (patch)
treea3e1880071bca828b1c58be71805ccce4b205e53 /src/runtime
parenteae658453199d67a41deccbeb78e55b8eea9e966 (diff)
downloadComputeLibrary-bcd2352d7fd99a2f6aab220fa0c3b3f3119a1a4c.tar.gz
COMPMID-3391: Implement Async interfaces
Change-Id: I8168cea5056ff48a0253ebb8c88ea549a3ea69a2 Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3335 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/runtime')
-rw-r--r--src/runtime/CPP/CPPScheduler.cpp78
-rw-r--r--src/runtime/CPP/SingleThreadScheduler.cpp8
-rw-r--r--src/runtime/NEON/INEOperator.cpp53
-rw-r--r--src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp20
-rw-r--r--src/runtime/NEON/functions/NEReductionOperation.cpp8
-rw-r--r--src/runtime/NEON/functions/NEReshapeLayer.cpp38
-rw-r--r--src/runtime/NEON/functions/NESoftmaxLayer.cpp27
-rw-r--r--src/runtime/OMP/OMPScheduler.cpp35
-rw-r--r--src/runtime/OperatorTensor.cpp57
9 files changed, 266 insertions, 58 deletions
diff --git a/src/runtime/CPP/CPPScheduler.cpp b/src/runtime/CPP/CPPScheduler.cpp
index 0a03497cb9..db551590ea 100644
--- a/src/runtime/CPP/CPPScheduler.cpp
+++ b/src/runtime/CPP/CPPScheduler.cpp
@@ -95,10 +95,10 @@ std::pair<unsigned, unsigned> split_2d(unsigned max_threads, std::size_t m, std:
// nt = sqrt(max_threads * (m / n) )
const unsigned adjusted = std::round(
- std::sqrt(max_threads * ratio));
+ std::sqrt(max_threads * ratio));
//find the nearest factor of max_threads
- for(unsigned i = 0; i!= adjusted; ++i)
+ for(unsigned i = 0; i != adjusted; ++i)
{
//try down
const unsigned adj_down = adjusted - i;
@@ -118,11 +118,11 @@ std::pair<unsigned, unsigned> split_2d(unsigned max_threads, std::size_t m, std:
//we didn't find anything so lets bail out with maxes biased to the largest dimension
if(m > n)
{
- return{ std::min<unsigned>(m, max_threads), 1 };
+ return { std::min<unsigned>(m, max_threads), 1 };
}
else
{
- return{ 1, std::min<unsigned>(n, max_threads) };
+ return { 1, std::min<unsigned>(n, max_threads) };
}
}
@@ -144,7 +144,6 @@ void process_workloads(std::vector<IScheduler::Workload> &workloads, ThreadFeede
}
while(feeder.get_next(workload_index));
}
-
} //namespace
struct CPPScheduler::Impl final
@@ -364,11 +363,11 @@ void CPPScheduler::run_workloads(std::vector<IScheduler::Workload> &workloads)
}
#endif /* DOXYGEN_SKIP_THIS */
-void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints)
+void CPPScheduler::schedule_common(ICPPKernel *kernel, const Hints &hints, std::vector<InputOperatorTensors *> &inputs, std::vector<OutputOperatorTensors *> &outputs)
{
ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel");
- const Window &max_window = kernel->window();
+ const Window &max_window = kernel->window();
if(hints.split_dimension() == IScheduler::split_dimensions_all)
{
@@ -379,34 +378,32 @@ void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints)
const std::size_t m = max_window.num_iterations(Window::DimX);
const std::size_t n = max_window.num_iterations(Window::DimY);
- //in c++17 this can be swapped for auto [ m_threads, n_threads ] = split_2d(...
+ //in c++17 this can be swapped for auto [ m_threads, n_threads ] = split_2d(...
unsigned m_threads, n_threads;
std::tie(m_threads, n_threads) = split_2d(_impl->_num_threads, m, n);
std::vector<IScheduler::Workload> workloads;
- for(unsigned int ni = 0; ni != n_threads; ++ni)
+ for(unsigned int ni = 0; ni != n_threads; ++ni)
{
- for(unsigned int mi = 0; mi != m_threads; ++mi)
+ for(unsigned int mi = 0; mi != m_threads; ++mi)
{
workloads.push_back(
- [ ni, mi, m_threads, n_threads, &max_window, &kernel ]
- (const ThreadInfo & info)
- {
- //narrow the window to our mi-ni workload
- Window win = max_window.split_window(Window::DimX, mi, m_threads)
- .split_window(Window::DimY, ni, n_threads);
+ [ni, mi, m_threads, n_threads, &max_window, &kernel](const ThreadInfo & info)
+ {
+ //narrow the window to our mi-ni workload
+ Window win = max_window.split_window(Window::DimX, mi, m_threads)
+ .split_window(Window::DimY, ni, n_threads);
- win.validate();
+ win.validate();
- Window thread_locator;
- thread_locator.set(Window::DimX, Window::Dimension(mi, m_threads));
- thread_locator.set(Window::DimY, Window::Dimension(ni, n_threads));
+ Window thread_locator;
+ thread_locator.set(Window::DimX, Window::Dimension(mi, m_threads));
+ thread_locator.set(Window::DimY, Window::Dimension(ni, n_threads));
- thread_locator.validate();
+ thread_locator.validate();
- kernel->run_nd(win, info, thread_locator);
- }
- );
+ kernel->run_nd(win, info, thread_locator);
+ });
}
}
run_workloads(workloads);
@@ -425,7 +422,14 @@ void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints)
{
ThreadInfo info;
info.cpu_info = &_cpu_info;
- kernel->run(max_window, info);
+ if(inputs.empty())
+ {
+ kernel->run(max_window, info);
+ }
+ else
+ {
+ kernel->run_op(inputs, outputs, max_window, info);
+ }
}
else
{
@@ -449,15 +453,35 @@ void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints)
for(unsigned int t = 0; t < num_windows; t++)
{
//Capture 't' by copy, all the other variables by reference:
- workloads[t] = [t, &hints, &max_window, &num_windows, &kernel](const ThreadInfo & info)
+ workloads[t] = [t, &hints, &max_window, &num_windows, &kernel, &inputs, &outputs](const ThreadInfo & info)
{
Window win = max_window.split_window(hints.split_dimension(), t, num_windows);
win.validate();
- kernel->run(win, info);
+
+ if(inputs.empty())
+ {
+ kernel->run(win, info);
+ }
+ else
+ {
+ kernel->run_op(inputs, outputs, win, info);
+ }
};
}
run_workloads(workloads);
}
}
}
+
+void CPPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector<InputOperatorTensors *> &inputs, std::vector<OutputOperatorTensors *> &outputs)
+{
+ schedule_common(kernel, hints, inputs, outputs);
+}
+
+void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints)
+{
+ std::vector<InputOperatorTensors *> inputs;
+ std::vector<OutputOperatorTensors *> outputs;
+ schedule_common(kernel, hints, inputs, outputs);
+}
} // namespace arm_compute
diff --git a/src/runtime/CPP/SingleThreadScheduler.cpp b/src/runtime/CPP/SingleThreadScheduler.cpp
index 660a79652c..777f84bec8 100644
--- a/src/runtime/CPP/SingleThreadScheduler.cpp
+++ b/src/runtime/CPP/SingleThreadScheduler.cpp
@@ -49,6 +49,14 @@ void SingleThreadScheduler::schedule(ICPPKernel *kernel, const Hints &hints)
kernel->run(kernel->window(), info);
}
+void SingleThreadScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector<InputOperatorTensors *> &inputs, std::vector<OutputOperatorTensors *> &outputs)
+{
+ ARM_COMPUTE_UNUSED(hints);
+ ThreadInfo info;
+ info.cpu_info = &_cpu_info;
+ kernel->run_op(inputs, outputs, kernel->window(), info);
+}
+
void SingleThreadScheduler::run_workloads(std::vector<Workload> &workloads)
{
ThreadInfo info;
diff --git a/src/runtime/NEON/INEOperator.cpp b/src/runtime/NEON/INEOperator.cpp
new file mode 100644
index 0000000000..c24d5c47f1
--- /dev/null
+++ b/src/runtime/NEON/INEOperator.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/INEOperator.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+INEOperator::INEOperator(IRuntimeContext *ctx)
+ : _kernel(), _ctx(ctx), _workspace()
+{
+}
+
+void INEOperator::run(std::vector<InputOperatorTensors *> &inputs, std::vector<OutputOperatorTensors *> &outputs, std::vector<OperatorTensors *> &workspace)
+{
+ ARM_COMPUTE_UNUSED(workspace);
+
+ if(inputs.empty() || outputs.empty())
+ {
+ ARM_COMPUTE_ERROR("No inputs provided");
+ }
+
+ NEScheduler::get().schedule_op(_kernel.get(), Window::DimY, inputs, outputs);
+}
+
+void INEOperator::prepare(std::vector<OperatorTensors *> constants)
+{
+ ARM_COMPUTE_UNUSED(constants);
+}
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp b/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp
index 82880bac85..dabbebacb4 100644
--- a/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp
+++ b/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp
@@ -31,9 +31,9 @@ namespace arm_compute
NEGenerateProposalsLayer::NEGenerateProposalsLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(memory_manager),
_permute_deltas_kernel(),
- _flatten_deltas_kernel(),
+ _flatten_deltas(),
_permute_scores_kernel(),
- _flatten_scores_kernel(),
+ _flatten_scores(),
_compute_anchors_kernel(),
_bounding_box_kernel(),
_pad_kernel(),
@@ -95,12 +95,12 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d
{
_memory_group.manage(&_deltas_permuted);
_permute_deltas_kernel.configure(deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 });
- _flatten_deltas_kernel.configure(&_deltas_permuted, &_deltas_flattened);
+ _flatten_deltas.configure(&_deltas_permuted, &_deltas_flattened);
_deltas_permuted.allocator()->allocate();
}
else
{
- _flatten_deltas_kernel.configure(deltas, &_deltas_flattened);
+ _flatten_deltas.configure(deltas, &_deltas_flattened);
}
const TensorShape flatten_shape_scores(1, total_num_anchors);
@@ -112,12 +112,12 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d
{
_memory_group.manage(&_scores_permuted);
_permute_scores_kernel.configure(scores, &_scores_permuted, PermutationVector{ 2, 0, 1 });
- _flatten_scores_kernel.configure(&_scores_permuted, &_scores_flattened);
+ _flatten_scores.configure(&_scores_permuted, &_scores_flattened);
_scores_permuted.allocator()->allocate();
}
else
{
- _flatten_scores_kernel.configure(scores, &_scores_flattened);
+ _flatten_scores.configure(scores, &_scores_flattened);
}
Tensor *anchors_to_use = &_all_anchors;
@@ -244,12 +244,12 @@ Status NEGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens
}
TensorInfo deltas_flattened_info(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true));
- ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayerKernel::validate(&deltas_permuted_info, &deltas_flattened_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayer::validate(&deltas_permuted_info, &deltas_flattened_info));
TensorInfo scores_flattened_info(scores->clone()->set_tensor_shape(TensorShape(1, total_num_anchors)).set_is_resizable(true));
TensorInfo proposals_4_roi_values(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true));
- ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayerKernel::validate(&scores_permuted_info, &scores_flattened_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayer::validate(&scores_permuted_info, &scores_flattened_info));
TensorInfo *proposals_4_roi_values_to_use = &proposals_4_roi_values;
TensorInfo proposals_4_roi_values_quantized(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true));
@@ -327,8 +327,8 @@ void NEGenerateProposalsLayer::run()
NEScheduler::get().schedule(&_permute_scores_kernel, Window::DimY);
}
- NEScheduler::get().schedule(&_flatten_deltas_kernel, Window::DimY);
- NEScheduler::get().schedule(&_flatten_scores_kernel, Window::DimY);
+ _flatten_deltas.run();
+ _flatten_scores.run();
if(_is_qasymm8)
{
diff --git a/src/runtime/NEON/functions/NEReductionOperation.cpp b/src/runtime/NEON/functions/NEReductionOperation.cpp
index 80ebe6731a..a895147cc9 100644
--- a/src/runtime/NEON/functions/NEReductionOperation.cpp
+++ b/src/runtime/NEON/functions/NEReductionOperation.cpp
@@ -54,7 +54,7 @@ size_t reduction_window_split_dimension(unsigned int axis)
} // namespace
NEReductionOperation::NEReductionOperation(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(memory_manager), _reduction_kernel(), _fill_border_kernel(), _reshape_kernel(), _output_internal(), _window_split(0), _reduction_axis(), _is_reshape_required(false)
+ : _memory_group(memory_manager), _reduction_kernel(), _fill_border_kernel(), _reshape(), _output_internal(), _window_split(0), _reduction_axis(), _is_reshape_required(false)
{
}
@@ -91,7 +91,7 @@ Status NEReductionOperation::validate(const ITensorInfo *input, const ITensorInf
if(is_reshape_required)
{
- ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayerKernel::validate(output_internal, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayer::validate(output_internal, output));
}
return Status{};
@@ -171,7 +171,7 @@ void NEReductionOperation::configure(ITensor *input, ITensor *output, unsigned i
if(_is_reshape_required)
{
- _reshape_kernel.configure(output_internal, output);
+ _reshape.configure(output_internal, output);
_output_internal.allocator()->allocate();
}
}
@@ -185,7 +185,7 @@ void NEReductionOperation::run()
NEScheduler::get().schedule(&_reduction_kernel, _window_split);
if(_is_reshape_required)
{
- NEScheduler::get().schedule(&_reshape_kernel, Window::DimY);
+ _reshape.run();
}
}
} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEReshapeLayer.cpp b/src/runtime/NEON/functions/NEReshapeLayer.cpp
index 0a9f42d510..680abef026 100644
--- a/src/runtime/NEON/functions/NEReshapeLayer.cpp
+++ b/src/runtime/NEON/functions/NEReshapeLayer.cpp
@@ -25,13 +25,17 @@
#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h"
#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "arm_compute/runtime/Types.h"
#include "support/MemorySupport.h"
#include <utility>
namespace arm_compute
{
-void NEReshapeLayer::configure(const ITensor *input, ITensor *output)
+namespace experimental
+{
+void NEReshapeLayer::configure(const ITensorInfo *input, ITensorInfo *output)
{
auto k = arm_compute::support::cpp14::make_unique<NEReshapeLayerKernel>();
k->configure(input, output);
@@ -40,9 +44,41 @@ void NEReshapeLayer::configure(const ITensor *input, ITensor *output)
Status NEReshapeLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
{
+ return arm_compute::NEReshapeLayer::validate(input, output);
+}
+
+MemoryRequirements NEReshapeLayer::workspace() const
+{
+ return MemoryRequirements{};
+}
+} // namespace experimental
+
+void NEReshapeLayer::configure(const ITensor *input, ITensor *output)
+{
+ _input = input;
+ _output = output;
+
+ auto k = arm_compute::support::cpp14::make_unique<NEReshapeLayerKernel>();
+ k->configure(input->info(), output->info());
+ _kernel = std::move(k);
+}
+
+Status NEReshapeLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
+{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayerKernel::validate(input, output));
return Status{};
}
+
+void NEReshapeLayer::run()
+{
+ InputOperatorTensors src_0 = std::make_pair(TensorType::ACL_SRC, _input);
+ OutputOperatorTensors dst_0 = std::make_pair(TensorType::ACL_DST, _output);
+
+ std::vector<InputOperatorTensors *> inputs = { &src_0 };
+ std::vector<OutputOperatorTensors *> outputs = { &dst_0 };
+
+ NEScheduler::get().schedule_op(_kernel.get(), Window::DimY, inputs, outputs);
+}
} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NESoftmaxLayer.cpp b/src/runtime/NEON/functions/NESoftmaxLayer.cpp
index 5509edec87..5cd6a550af 100644
--- a/src/runtime/NEON/functions/NESoftmaxLayer.cpp
+++ b/src/runtime/NEON/functions/NESoftmaxLayer.cpp
@@ -32,8 +32,8 @@ namespace arm_compute
{
template <bool IS_LOG>
NESoftmaxLayerGeneric<IS_LOG>::NESoftmaxLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _max_kernel(), _softmax_kernel(), _flat_or_reshape_kernel_ptr(nullptr), _fill_border_kernel(), _reshape_kernel(), _max(), _tmp(), _input_flattened(),
- _output_flattened(), _needs_flattening(false)
+ : _memory_group(std::move(memory_manager)), _max_kernel(), _softmax_kernel(), _flat_or_reshape_ptr(nullptr), _fill_border_kernel(), _reshape(), _max(), _tmp(), _input_flattened(), _output_flattened(),
+ _needs_flattening(false)
{
}
@@ -46,23 +46,20 @@ void NESoftmaxLayerGeneric<IS_LOG>::configure_reshape_input_kernel(const ITensor
// Initialize the flat input
_input_flattened.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_flatten));
- // If we need to flatten the input, we can use NEFlattenKernel or NEReshapeKernel
- // If the number of reduced axes is 3 (max dimension), which means collapsing all axes except the batch axis, we use NEFlattenKernel.
- // In all other cases we have to use NEReshapeKernel
// Note that the "other cases" include both:
// 1. first_n_reduce_axes < 3: Reduce the first 1 (no need to reduce) or 2 dimensions (inclusive)
// 2. first_n_reduce_axes == 4: Reduce all 4 dimensions. This can only be handled by NEReshapeKernel instead of NEFlattenKernel.
if(first_n_reduce_axes == 3)
{
- auto flatten_kernel_ptr = support::cpp14::make_unique<NEFlattenLayerKernel>();
+ auto flatten_kernel_ptr = support::cpp14::make_unique<NEFlattenLayer>();
flatten_kernel_ptr->configure(input, &_input_flattened);
- _flat_or_reshape_kernel_ptr = std::move(flatten_kernel_ptr);
+ _flat_or_reshape_ptr = std::move(flatten_kernel_ptr);
}
else
{
- auto reshape_kernel_ptr = support::cpp14::make_unique<NEReshapeLayerKernel>();
+ auto reshape_kernel_ptr = support::cpp14::make_unique<NEReshapeLayer>();
reshape_kernel_ptr->configure(input, &_input_flattened);
- _flat_or_reshape_kernel_ptr = std::move(reshape_kernel_ptr);
+ _flat_or_reshape_ptr = std::move(reshape_kernel_ptr);
}
// We need to init the output tensor here. Indeed, the reshape kernel expects
@@ -127,7 +124,7 @@ void NESoftmaxLayerGeneric<IS_LOG>::configure(ITensor *input, ITensor *output, f
_input_flattened.allocator()->allocate();
// Reshape the flat output into the requested (4D) output
- _reshape_kernel.configure(&_output_flattened, output);
+ _reshape.configure(&_output_flattened, output);
// Allocate the intermediate flat tensors
_output_flattened.allocator()->allocate();
@@ -174,11 +171,11 @@ Status NESoftmaxLayerGeneric<IS_LOG>::validate(const ITensorInfo *input, const I
if(first_n_reduce_axes == 3)
{
- ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayerKernel::validate(input, &tensor_info_flat));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayer::validate(input, &tensor_info_flat));
}
else
{
- ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayerKernel::validate(input, &tensor_info_flat));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayer::validate(input, &tensor_info_flat));
}
}
@@ -195,7 +192,7 @@ void NESoftmaxLayerGeneric<IS_LOG>::run()
if(_needs_flattening)
{
- NEScheduler::get().schedule(_flat_or_reshape_kernel_ptr.get(), Window::DimY);
+ _flat_or_reshape_ptr->run();
}
NEScheduler::get().schedule(&_fill_border_kernel, Window::DimY);
@@ -204,11 +201,11 @@ void NESoftmaxLayerGeneric<IS_LOG>::run()
if(_needs_flattening)
{
- NEScheduler::get().schedule(&_reshape_kernel, Window::DimY);
+ _reshape.run();
}
}
template class NESoftmaxLayerGeneric<false>;
template class NESoftmaxLayerGeneric<true>;
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute
diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp
index f67f06fc94..a1851f03c3 100644
--- a/src/runtime/OMP/OMPScheduler.cpp
+++ b/src/runtime/OMP/OMPScheduler.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -83,6 +83,39 @@ void OMPScheduler::schedule(ICPPKernel *kernel, const Hints &hints)
}
}
+void OMPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector<InputOperatorTensors *> &inputs, std::vector<OutputOperatorTensors *> &outputs)
+{
+ ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel");
+ ARM_COMPUTE_ERROR_ON_MSG(hints.strategy() == StrategyHint::DYNAMIC,
+ "Dynamic scheduling is not supported in OMPScheduler");
+
+ const Window &max_window = kernel->window();
+ const unsigned int num_iterations = max_window.num_iterations(hints.split_dimension());
+ const unsigned int num_threads = std::min(num_iterations, _num_threads);
+
+ if(!kernel->is_parallelisable() || num_threads == 1)
+ {
+ ThreadInfo info;
+ info.cpu_info = &_cpu_info;
+ kernel->run_op(inputs, outputs, max_window, info);
+ }
+ else
+ {
+ const unsigned int num_windows = num_threads;
+ std::vector<IScheduler::Workload> workloads(num_windows);
+ for(unsigned int t = 0; t < num_windows; t++)
+ {
+ //Capture 't' by copy, all the other variables by reference:
+ workloads[t] = [t, &hints, &max_window, &num_windows, &kernel, &inputs, &outputs](const ThreadInfo & info)
+ {
+ Window win = max_window.split_window(hints.split_dimension(), t, num_windows);
+ win.validate();
+ kernel->run_op(inputs, outputs, win, info);
+ };
+ }
+ run_workloads(workloads);
+ }
+}
#ifndef DOXYGEN_SKIP_THIS
void OMPScheduler::run_workloads(std::vector<arm_compute::IScheduler::Workload> &workloads)
{
diff --git a/src/runtime/OperatorTensor.cpp b/src/runtime/OperatorTensor.cpp
new file mode 100644
index 0000000000..5d4e126177
--- /dev/null
+++ b/src/runtime/OperatorTensor.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/OperatorTensor.h"
+#include "arm_compute/runtime/MemoryRegion.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+OperatorTensor::OperatorTensor(ITensorInfo *info, IMemory *memory)
+ : _info(info), _memory(memory), _mem_type(MemoryType::CPU)
+{
+}
+
+ITensorInfo *OperatorTensor::info() const
+{
+ return _info;
+}
+
+ITensorInfo *OperatorTensor::info()
+{
+ return _info;
+}
+
+uint8_t *OperatorTensor::buffer() const
+{
+ switch(_mem_type)
+ {
+ case MemoryType::CPU:
+ return (uint8_t *)dynamic_cast<MemoryRegion *>(_memory->region())->buffer();
+ default:
+ ARM_COMPUTE_ERROR("Memory type not supported.");
+ }
+}
+} // namespace experimental
+} // namespace arm_compute