aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLReductionOperationKernel.cpp
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2018-06-15 16:15:26 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commitd1794ebfa10d05af7d2458c5d506152fd38068d3 (patch)
treee3f286aaba86b1f0bcda3390ad4d8af96b965fc7 /src/core/CL/kernels/CLReductionOperationKernel.cpp
parent7777b1aa865d3c17dcef31573d44fae421176109 (diff)
downloadComputeLibrary-d1794ebfa10d05af7d2458c5d506152fd38068d3.tar.gz
COMPMID-1226 Extend CLMeanStdDev to support FP32 / FP16
- Extend support for FP16 in CLReduction. - For F16/F32 MeanStdDev we perform one reduction operation for mean and one for stddev and we calculate the final result in the host CPU. Change-Id: Iad2099f26c0ba7969737d22f00c6c275634d875c Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/135870 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLReductionOperationKernel.cpp')
-rw-r--r--src/core/CL/kernels/CLReductionOperationKernel.cpp4
1 files changed, 2 insertions, 2 deletions
diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp
index d64f0d89c5..95967fa974 100644
--- a/src/core/CL/kernels/CLReductionOperationKernel.cpp
+++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp
@@ -44,7 +44,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u
ARM_COMPUTE_UNUSED(op);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions");
@@ -69,7 +69,7 @@ std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITe
const unsigned int num_elems_processed_per_iteration = 16;
Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
- const unsigned int border_width = ((input->dimension(0) % 128) != 0) ? 128 - input->dimension(0) % 128 : 0; // TODO (COMPMID-1143): Fix padding (possible value 127!)
+ const unsigned int border_width = ((input->dimension(0) % num_elems_processed_per_iteration) != 0) ? num_elems_processed_per_iteration - input->dimension(0) % num_elems_processed_per_iteration : 0;
AccessWindowStatic input_access(input, 0, 0, input->dimension(0) + border_width, 1);
AccessWindowHorizontal output_access(output, 0, 1);