aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arm_compute/core/CL/kernels/CLScaleKernel.h13
-rw-r--r--src/core/CL/kernels/CLScaleKernel.cpp15
-rw-r--r--src/runtime/CL/functions/CLScale.cpp5
-rw-r--r--src/runtime/CL/tuners/BifrostTuner.cpp32
4 files changed, 63 insertions, 2 deletions
diff --git a/arm_compute/core/CL/kernels/CLScaleKernel.h b/arm_compute/core/CL/kernels/CLScaleKernel.h
index b089e8f61a..db87519c0f 100644
--- a/arm_compute/core/CL/kernels/CLScaleKernel.h
+++ b/arm_compute/core/CL/kernels/CLScaleKernel.h
@@ -58,10 +58,23 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy = SamplingPolicy::CENTER);
+ /** Input tensor accessor.
+ *
+ * @return Pointer to input tensor.
+ */
+ const ICLTensor *input() const;
+ /** Output tensor accessor.
+ *
+ * @return Pointer to output tensor.
+ */
+ const ICLTensor *output() const;
// Inherited methods overridden:
BorderSize border_size() const override;
void run(const Window &window, cl::CommandQueue &queue) override;
+
+public:
+ InterpolationPolicy _interpolationPolicy = InterpolationPolicy::BILINEAR;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLSCALEKERNEL_H__ */
diff --git a/src/core/CL/kernels/CLScaleKernel.cpp b/src/core/CL/kernels/CLScaleKernel.cpp
index b2cd4b7adf..d56d6f7da8 100644
--- a/src/core/CL/kernels/CLScaleKernel.cpp
+++ b/src/core/CL/kernels/CLScaleKernel.cpp
@@ -148,10 +148,21 @@ Status CLScaleKernel::validate(const ITensorInfo *input, const ITensorInfo *outp
return Status{};
}
+const ICLTensor *CLScaleKernel::input() const
+{
+ return _input;
+}
+
+const ICLTensor *CLScaleKernel::output() const
+{
+ return _output;
+}
+
void CLScaleKernel::configure(const ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy)
{
- _input = input;
- _output = output;
+ _input = input;
+ _output = output;
+ _interpolationPolicy = policy;
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), policy));
diff --git a/src/runtime/CL/functions/CLScale.cpp b/src/runtime/CL/functions/CLScale.cpp
index 7ef55f9f08..4ff9763397 100644
--- a/src/runtime/CL/functions/CLScale.cpp
+++ b/src/runtime/CL/functions/CLScale.cpp
@@ -27,6 +27,7 @@
#include "arm_compute/core/CL/kernels/CLScaleKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
#include "support/ToolchainSupport.h"
using namespace arm_compute;
@@ -34,9 +35,13 @@ using namespace arm_compute;
void CLScale::configure(ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy)
{
auto k = arm_compute::support::cpp14::make_unique<CLScaleKernel>();
+ k->set_target(CLScheduler::get().target());
k->configure(input, output, policy, border_mode, sampling_policy);
_kernel = std::move(k);
+ // Tune kernels
+ CLScheduler::get().tune_kernel_static(*_kernel);
+
// In the case of NHWC we can't have undefined border mode as this would require to access elements outside z dimension,
// so we treat it like border constant.
if(border_mode == BorderMode::UNDEFINED && input->info()->data_layout() == DataLayout::NHWC)
diff --git a/src/runtime/CL/tuners/BifrostTuner.cpp b/src/runtime/CL/tuners/BifrostTuner.cpp
index fa67710cc8..2d52f3392e 100644
--- a/src/runtime/CL/tuners/BifrostTuner.cpp
+++ b/src/runtime/CL/tuners/BifrostTuner.cpp
@@ -249,6 +249,34 @@ void tune_pooling_kernel(CLPoolingLayerKernel &k)
k.set_lws_hint(lws_hint);
}
+
+void tune_scale_kernel(CLScaleKernel &k)
+{
+ cl::NDRange lws_hint = k.lws_hint();
+ const GPUTarget gpu_target = k.get_target();
+ const DataType dt = k.input()->info()->data_type();
+ const InterpolationPolicy interpolation = k._interpolationPolicy;
+
+ // Configure the local work size for Bifrost, interpolation (bilinear) and datatype F32.
+ // The value are obtained via exhaustive autotuning.
+ if(gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72) && (dt == DataType::F32) && (interpolation == InterpolationPolicy::BILINEAR))
+ {
+ auto dim_0 = k.output()->info()->dimension(0);
+ if(dim_0 == 480)
+ {
+ lws_hint = cl::NDRange(2, 1);
+ }
+ else if(dim_0 == 3120)
+ {
+ lws_hint = cl::NDRange(2, 8);
+ }
+ else if(dim_0 == 4160)
+ {
+ lws_hint = cl::NDRange(4, 8);
+ }
+ k.set_lws_hint(lws_hint);
+ }
+}
} // namespace
void BifrostTuner::tune_kernel_static(ICLKernel &kernel)
@@ -281,6 +309,10 @@ void BifrostTuner::tune_kernel_static(ICLKernel &kernel)
{
tune_pooling_kernel(*utils::cast::polymorphic_downcast<CLPoolingLayerKernel *>(&kernel));
}
+ else if(dynamic_cast<CLScaleKernel *>(&kernel) != nullptr)
+ {
+ tune_scale_kernel(*utils::cast::polymorphic_downcast<CLScaleKernel *>(&kernel));
+ }
}
void BifrostTuner::tune_kernel_dynamic(ICLKernel &kernel)