aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arm_compute/core/PixelValue.h50
-rw-r--r--arm_compute/core/Types.h9
-rw-r--r--arm_compute/runtime/CL/functions/CLReductionOperation.h8
-rw-r--r--src/core/CL/cl_kernels/reduction_operation.cl68
-rw-r--r--src/core/CL/kernels/CLFillBorderKernel.cpp28
-rw-r--r--src/core/CL/kernels/CLReductionOperationKernel.cpp72
-rw-r--r--src/core/NEON/kernels/NEFillBorderKernel.cpp2
-rw-r--r--src/runtime/CL/functions/CLReductionOperation.cpp100
-rw-r--r--src/runtime/NEON/functions/NEIntegralImage.cpp4
-rw-r--r--src/runtime/NEON/functions/NEScale.cpp4
-rw-r--r--tests/datasets/ReductionOperationDataset.h53
-rw-r--r--tests/validation/CL/ReductionOperation.cpp20
-rw-r--r--tests/validation/NEON/ReductionOperation.cpp12
-rw-r--r--tests/validation/reference/ReductionOperation.cpp11
-rw-r--r--utils/TypePrinter.h5
15 files changed, 256 insertions, 190 deletions
diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h
index 530cc166a2..e86eeba121 100644
--- a/arm_compute/core/PixelValue.h
+++ b/arm_compute/core/PixelValue.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,6 +39,54 @@ public:
: value{ uint64_t(0) }
{
}
+ /** Initialize the union with a pixel value of chosen datatype
+ *
+ * @param[in] v int value.
+ * @param[in] datatype DataType that @p v have to be stored
+ */
+ PixelValue(uint64_t v, DataType datatype)
+ : PixelValue()
+ {
+ switch(datatype)
+ {
+ case DataType::U8:
+ value.u8 = static_cast<uint8_t>(v);
+ break;
+ case DataType::S8:
+ value.s8 = static_cast<int8_t>(v);
+ break;
+ case DataType::U16:
+ value.u16 = static_cast<uint16_t>(v);
+ break;
+ case DataType::S16:
+ value.s16 = static_cast<int16_t>(v);
+ break;
+ case DataType::U32:
+ value.u32 = static_cast<uint32_t>(v);
+ break;
+ case DataType::S32:
+ value.s32 = static_cast<int32_t>(v);
+ break;
+ case DataType::U64:
+ value.u64 = static_cast<uint64_t>(v);
+ break;
+ case DataType::S64:
+ value.s16 = static_cast<int64_t>(v);
+ break;
+ case DataType::F16:
+ value.f16 = static_cast<half>(v);
+ break;
+ case DataType::F32:
+ value.f32 = static_cast<float>(v);
+ break;
+ case DataType::F64:
+ value.f64 = static_cast<double>(v);
+ break;
+ default:
+ value.u64 = v;
+ break;
+ }
+ }
/** Initialize the union with a U8 pixel value
*
* @param[in] v U8 value.
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index dc87617f55..317c8990fa 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -538,11 +538,12 @@ enum class NonLinearFilterFunction : unsigned
/** Available reduction operations */
enum class ReductionOperation
{
- SUM_SQUARE, /**< Sum of squares */
- SUM, /**< Sum */
- MEAN_SUM, /**< Mean of sum */
ARG_IDX_MAX, /**< Index of the max value */
- ARG_IDX_MIN /**< Index of the min value */
+ ARG_IDX_MIN, /**< Index of the min value */
+ MEAN_SUM, /**< Mean of sum */
+ PROD, /**< Product */
+ SUM_SQUARE, /**< Sum of squares */
+ SUM /**< Sum */
};
/** Available element-wise operations */
diff --git a/arm_compute/runtime/CL/functions/CLReductionOperation.h b/arm_compute/runtime/CL/functions/CLReductionOperation.h
index ac35b7bfc6..977562d993 100644
--- a/arm_compute/runtime/CL/functions/CLReductionOperation.h
+++ b/arm_compute/runtime/CL/functions/CLReductionOperation.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -76,12 +76,12 @@ public:
private:
CLMemoryGroup _memory_group;
- std::unique_ptr<CLTensor[]> _sums_vector{ nullptr };
+ std::unique_ptr<CLTensor[]> _results_vector{ nullptr };
std::unique_ptr<CLReductionOperationKernel[]> _reduction_kernels_vector{ nullptr };
std::unique_ptr<CLFillBorderKernel[]> _border_handlers_vector{ nullptr };
unsigned int _num_of_stages;
unsigned int _reduction_axis;
- bool _is_quantized;
+ bool _is_serial;
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLREDUCTIONOPERATION_H__ */
diff --git a/src/core/CL/cl_kernels/reduction_operation.cl b/src/core/CL/cl_kernels/reduction_operation.cl
index d1f47beda7..b4ede25296 100644
--- a/src/core/CL/cl_kernels/reduction_operation.cl
+++ b/src/core/CL/cl_kernels/reduction_operation.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -60,12 +60,31 @@ inline DATA_TYPE sum(__global const DATA_TYPE *input)
return (in.s0 + in.s1);
}
+
+/** Calculate product of a vector
+ *
+ * @param[in] input Pointer to the first pixel.
+ *
+ * @return product of vector.
+ */
+inline DATA_TYPE product(__global const DATA_TYPE *input)
+{
+ VEC_DATA_TYPE(DATA_TYPE, 16)
+ in = vload16(0, input);
+
+ in.s01234567 *= in.s89ABCDEF;
+ in.s0123 *= in.s4567;
+ in.s01 *= in.s23;
+
+ return (in.s0 * in.s1);
+}
#if defined(OPERATION)
/** This kernel performs parallel reduction given an operation on x-axis.
*
* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
* @note The operation we want to perform must be passed at compile time using -DOPERATION e.g. -DOPERATION=square_sum
* @note The mean flag must be passed at compile time using -DMEAN if we want to compute the mean value
+ * @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
* @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128 if we want to compute the mean value
*
* @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
@@ -74,28 +93,28 @@ inline DATA_TYPE sum(__global const DATA_TYPE *input)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] partial_sum_ptr The local buffer to hold sumed values. Supported data types: same as @p src_ptt
- * @param[in] partial_sum_stride_x Stride of the output tensor in X dimension (in bytes)
- * @param[in] partial_sum_step_x partial_sum_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] partial_sum_stride_y Stride of the output tensor in Y dimension (in bytes)
- * @param[in] partial_sum_step_y partial_sum_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] partial_sum_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] local_sums Local buffer for storing the partial sum
+ * @param[in] partial_res_ptr The local buffer to hold partial result values. Supported data types: same as @p src_ptr
+ * @param[in] partial_res_stride_x Stride of the output tensor in X dimension (in bytes)
+ * @param[in] partial_res_step_x partial_res_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] partial_res_stride_y Stride of the output tensor in Y dimension (in bytes)
+ * @param[in] partial_res_step_y partial_res_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] partial_res_offset_first_element_in_bytes The offset of the first element in the source tensor
+ * @param[in] local_results Local buffer for storing the partial result
*/
__kernel void reduction_operation_x(
IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(partial_sum),
- __local DATA_TYPE *local_sums)
+ IMAGE_DECLARATION(partial_res),
+ __local DATA_TYPE *local_results)
{
Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image partial_sum = CONVERT_TO_IMAGE_STRUCT(partial_sum);
+ Image partial_res = CONVERT_TO_IMAGE_STRUCT(partial_res);
unsigned int lsize = get_local_size(0);
unsigned int lid = get_local_id(0);
for(unsigned int y = 0; y < get_local_size(1); ++y)
{
- local_sums[lid] = OPERATION((__global DATA_TYPE *)offset(&src, 0, y));
+ local_results[lid] = OPERATION((__global DATA_TYPE *)offset(&src, 0, y));
barrier(CLK_LOCAL_MEM_FENCE);
// Perform parallel reduction
@@ -103,7 +122,11 @@ __kernel void reduction_operation_x(
{
if(lid < i)
{
- local_sums[lid] += local_sums[lid + i];
+#if defined(PROD)
+ local_results[lid] *= local_results[lid + i];
+#else //!defined(PROD)
+ local_results[lid] += local_results[lid + i];
+#endif //defined(PROD)
}
barrier(CLK_LOCAL_MEM_FENCE);
}
@@ -113,10 +136,10 @@ __kernel void reduction_operation_x(
#if defined(MEAN) && defined(WIDTH)
if(y == get_local_size(1) - 1)
{
- local_sums[0] /= WIDTH;
+ local_results[0] /= WIDTH;
}
#endif /* defined(MEAN) && defined(WIDTH) */
- ((__global DATA_TYPE *)offset(&partial_sum, get_group_id(0), y))[0] = local_sums[0];
+ ((__global DATA_TYPE *)offset(&partial_res, get_group_id(0), y))[0] = local_results[0];
}
}
}
@@ -127,6 +150,7 @@ __kernel void reduction_operation_x(
*
* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
* @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128
+ * @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
* @note In case of ARG_MIN and ARG_MAX the condition data type must be passed at compile time using -DCOND_DATA_TYPE e.g. -DCOND_DATA_TYPE=short
*
* @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32 and QASYMM8 for operation MEAN
@@ -230,7 +254,11 @@ __kernel void reduction_operation_y(
#if defined(SUM_SQUARE)
in *= in;
#endif // defined(SUM_SQUARE)
+#if defined(PROD)
+ res *= in;
+#else //!defined(PROD)
res += in;
+#endif //defined(PROD)
#endif // defined(ARG_MAX) || defined(ARG_MIN)
}
@@ -304,7 +332,11 @@ __kernel void reduction_operation_z(
#if defined(SUM_SQUARE)
in *= in;
#endif // defined(SUM_SQUARE)
+#if defined(PROD)
+ res *= in;
+#else //!defined(PROD)
res += in;
+#endif //defined(PROD)
#endif // defined(ARG_MAX) || defined(ARG_MIN)
}
@@ -383,7 +415,11 @@ __kernel void reduction_operation_w(
#if defined(SUM_SQUARE)
in *= in;
#endif // defined(SUM_SQUARE)
+#if defined(PROD)
+ res *= in;
+#else //!defined(PROD)
res += in;
+#endif //defined(PROD)
#endif // defined(ARG_MAX) || defined(ARG_MIN)
}
@@ -397,4 +433,4 @@ __kernel void reduction_operation_w(
vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
#endif // defined(ARG_MAX) || defined(ARG_MIN)
}
-#endif /* defined(BATCH) && defined(DEPTH) */ \ No newline at end of file
+#endif /* defined(BATCH) && defined(DEPTH) */
diff --git a/src/core/CL/kernels/CLFillBorderKernel.cpp b/src/core/CL/kernels/CLFillBorderKernel.cpp
index 69206678d0..5fdb826f8b 100644
--- a/src/core/CL/kernels/CLFillBorderKernel.cpp
+++ b/src/core/CL/kernels/CLFillBorderKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -75,25 +75,18 @@ void CLFillBorderKernel::configure(ICLTensor *tensor, BorderSize border_size, Bo
// Select appropriate kernel
std::string kernel_name = "fill_image_borders_" + lower_string(string_from_border_mode(border_mode));
- // Define select type required by replicate border > 1
- const DataType dt = tensor->info()->data_type();
- std::string select_type = get_underlying_cl_type_from_data_type(dt);
- if(is_data_type_float(dt))
- {
- select_type = (DataType::F32 == dt) ? "int" : "short";
- }
+ const DataType dt = tensor->info()->data_type();
// Define build options
- std::set<std::string> build_opts;
- build_opts.emplace(("-DDATA_TYPE=" + get_underlying_cl_type_from_data_type(dt)));
- build_opts.emplace(("-DSELECT_TYPE=" + select_type));
- build_opts.emplace(("-DBORDER_SIZE_TOP=" + support::cpp11::to_string(border_size.top)));
- build_opts.emplace(("-DBORDER_SIZE_BOTTOM=" + support::cpp11::to_string(border_size.bottom)));
- build_opts.emplace(("-DBORDER_SIZE_LEFT=" + support::cpp11::to_string(border_size.left)));
- build_opts.emplace(("-DBORDER_SIZE_RIGHT=" + support::cpp11::to_string(border_size.right)));
+ CLBuildOptions build_opts;
+ build_opts.add_option("-DDATA_TYPE=" + get_underlying_cl_type_from_data_type(dt));
+ build_opts.add_option("-DBORDER_SIZE_TOP=" + support::cpp11::to_string(border_size.top));
+ build_opts.add_option("-DBORDER_SIZE_BOTTOM=" + support::cpp11::to_string(border_size.bottom));
+ build_opts.add_option("-DBORDER_SIZE_LEFT=" + support::cpp11::to_string(border_size.left));
+ build_opts.add_option("-DBORDER_SIZE_RIGHT=" + support::cpp11::to_string(border_size.right));
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+ _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
_tensor = tensor;
// Create static kernel arguments
@@ -141,8 +134,9 @@ void CLFillBorderKernel::configure(ICLTensor *tensor, BorderSize border_size, Bo
set_constant_border<float>(idx, constant_border_value);
break;
case DataType::F16:
+ static_assert(sizeof(cl_half) == sizeof(half), "Half must be same size as cl_half");
static_assert(sizeof(cl_half) == 2, "Half must be 16 bit");
- set_constant_border<cl_half>(idx, constant_border_value);
+ set_constant_border<half>(idx, constant_border_value);
break;
default:
ARM_COMPUTE_ERROR("Not handled");
diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp
index 959209edc0..45aa810517 100644
--- a/src/core/CL/kernels/CLReductionOperationKernel.cpp
+++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -80,13 +80,13 @@ std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITe
const unsigned int num_elems_processed_per_iteration = (is_data_type_quantized(input->data_type()) && (axis == 0)) ? 1 : 16;
Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
bool window_changed = false;
- const bool is_arg_op = (op == ReductionOperation::ARG_IDX_MAX || op == ReductionOperation::ARG_IDX_MIN);
+ const bool is_serial_op = (op == ReductionOperation::ARG_IDX_MAX || op == ReductionOperation::ARG_IDX_MIN || is_data_type_quantized(input->data_type()));
switch(axis)
{
case 0:
{
- if(is_data_type_quantized(input->data_type()) || is_arg_op)
+ if(is_serial_op)
{
AccessWindowHorizontal input_access(input, 0, input->dimension(0));
AccessWindowHorizontal output_access(output, 0, 1);
@@ -153,10 +153,11 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou
}
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
build_opts.add_option("-DDATA_TYPE_PROMOTED=" + data_type_promoted);
- build_opts.add_option_if(op == ReductionOperation::SUM_SQUARE, "-DSUM_SQUARE=");
+ build_opts.add_option_if(op == ReductionOperation::SUM_SQUARE, "-DSUM_SQUARE");
build_opts.add_option_if(op == ReductionOperation::MEAN_SUM, "-DMEAN");
build_opts.add_option_if(op == ReductionOperation::ARG_IDX_MAX, "-DARG_MAX");
build_opts.add_option_if(op == ReductionOperation::ARG_IDX_MIN, "-DARG_MIN");
+ build_opts.add_option_if(op == ReductionOperation::PROD, "-DPROD");
switch(op)
{
@@ -170,6 +171,9 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou
case ReductionOperation::ARG_IDX_MAX:
case ReductionOperation::ARG_IDX_MIN:
break;
+ case ReductionOperation::PROD:
+ build_opts.add_option(("-DOPERATION=product"));
+ break;
default:
ARM_COMPUTE_ERROR("Unsupported reduction operation");
}
@@ -177,12 +181,18 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou
// Create kernel
cl::NDRange lws_hint = CLKernelLibrary::get().default_ndrange();
std::string kernel_axis_name;
- const bool is_arg_op = (op == ReductionOperation::ARG_IDX_MAX || op == ReductionOperation::ARG_IDX_MIN);
+ const bool is_serial_op = (op == ReductionOperation::ARG_IDX_MAX || op == ReductionOperation::ARG_IDX_MIN || is_data_type_quantized(input->info()->data_type()));
switch(axis)
{
case 0:
{
- if(!is_data_type_quantized(input->info()->data_type()) && !is_arg_op)
+ if(is_serial_op)
+ {
+ build_opts.add_option("-DWIDTH=" + support::cpp11::to_string(input->info()->dimension(0)));
+ build_opts.add_option_if_else(_input->info()->data_type() == DataType::F32, "-DCOND_DATA_TYPE=int", "-DCOND_DATA_TYPE=short");
+ kernel_axis_name = "non_parallel_x";
+ }
+ else
{
build_opts.add_option_if(op == ReductionOperation::MEAN_SUM, "-DWIDTH=" + support::cpp11::to_string(width));
const unsigned int width_leftover = input->info()->dimension(0) % border_val;
@@ -195,12 +205,6 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou
lws_hint = cl::NDRange(std::min(8U, num_of_threads));
_border_size = BorderSize(0, border_width, 0, 0);
}
- else
- {
- build_opts.add_option("-DWIDTH=" + support::cpp11::to_string(input->info()->dimension(0)));
- build_opts.add_option_if_else(_input->info()->data_type() == DataType::F32, "-DCOND_DATA_TYPE=int", "-DCOND_DATA_TYPE=short");
- kernel_axis_name = "non_parallel_x";
- }
}
break;
case 1:
@@ -242,13 +246,31 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
- const bool is_arg_op = (_op == ReductionOperation::ARG_IDX_MAX || _op == ReductionOperation::ARG_IDX_MIN);
+ const bool is_serial_op = (_op == ReductionOperation::ARG_IDX_MAX || _op == ReductionOperation::ARG_IDX_MIN || is_data_type_quantized(_input->info()->data_type()));
switch(_reduction_axis)
{
case 0:
{
// We use parallel reduction only in non quantized types
- if(!is_data_type_quantized(_input->info()->data_type()) && !is_arg_op)
+ if(is_serial_op)
+ {
+ // Get first input and output slices
+ Window window_in{ window };
+ window_in.set(Window::DimX, Window::Dimension(0, _input->info()->dimension(0), _input->info()->dimension(0)));
+
+ Window in_slice = window.first_slice_window_1D();
+ Window out_slice = window.first_slice_window_1D();
+
+ do
+ {
+ unsigned int idx = 0;
+ add_1D_tensor_argument(idx, _input, in_slice);
+ add_1D_tensor_argument(idx, _output, out_slice);
+ enqueue(queue, *this, in_slice);
+ }
+ while(window_in.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(out_slice));
+ }
+ else
{
// Set out window
Window out_window(window);
@@ -263,8 +285,8 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que
in_slice.set(Window::DimX, Window::Dimension(in_slice.x().start(), in_slice.x().end() + border_width, in_slice.x().step()));
// Set local sums buffer
- unsigned int local_sum_size = lws_hint()[0] * _input->info()->element_size();
- _kernel.setArg(num_arguments_per_2D_tensor() * 2, local_sum_size, nullptr);
+ unsigned int local_res_size = lws_hint()[0] * _input->info()->element_size();
+ _kernel.setArg(num_arguments_per_2D_tensor() * 2, local_res_size, nullptr);
do
{
@@ -275,24 +297,6 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que
}
while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice));
}
- else
- {
- // Get first input and output slices
- Window window_in{ window };
- window_in.set(Window::DimX, Window::Dimension(0, _input->info()->dimension(0), _input->info()->dimension(0)));
-
- Window in_slice = window.first_slice_window_1D();
- Window out_slice = window.first_slice_window_1D();
-
- do
- {
- unsigned int idx = 0;
- add_1D_tensor_argument(idx, _input, in_slice);
- add_1D_tensor_argument(idx, _output, out_slice);
- enqueue(queue, *this, in_slice);
- }
- while(window_in.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(out_slice));
- }
}
break;
case 1:
diff --git a/src/core/NEON/kernels/NEFillBorderKernel.cpp b/src/core/NEON/kernels/NEFillBorderKernel.cpp
index 39bcd996f9..f4046e0851 100644
--- a/src/core/NEON/kernels/NEFillBorderKernel.cpp
+++ b/src/core/NEON/kernels/NEFillBorderKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/src/runtime/CL/functions/CLReductionOperation.cpp b/src/runtime/CL/functions/CLReductionOperation.cpp
index c5447ffd6b..e2dec6b375 100644
--- a/src/runtime/CL/functions/CLReductionOperation.cpp
+++ b/src/runtime/CL/functions/CLReductionOperation.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -56,15 +56,19 @@ unsigned int calculate_number_of_stages(const ITensorInfo *input, unsigned int a
} // namespace
CLReductionOperation::CLReductionOperation(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _sums_vector(), _reduction_kernels_vector(), _border_handlers_vector(), _num_of_stages(), _reduction_axis(), _is_quantized()
+ : _memory_group(std::move(memory_manager)), _results_vector(), _reduction_kernels_vector(), _border_handlers_vector(), _num_of_stages(), _reduction_axis(), _is_serial()
{
}
Status CLReductionOperation::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op)
{
const unsigned int num_of_stages = calculate_number_of_stages(input, axis);
-
- if(axis == 0 && !is_data_type_quantized(input->data_type()))
+ bool is_serial = is_data_type_quantized(input->data_type()) || axis != 0;
+ if(is_serial)
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperationKernel::validate(input, output, axis, op));
+ }
+ else
{
// Create temporary tensor infos
auto sums_vector = arm_compute::support::cpp14::make_unique<TensorInfo[]>(num_of_stages - 1);
@@ -81,17 +85,25 @@ Status CLReductionOperation::validate(const ITensorInfo *input, const ITensorInf
}
ReductionOperation first_kernel_op;
+ ReductionOperation intermediate_kernel_op;
ReductionOperation last_kernel_op;
switch(op)
{
case ReductionOperation::SUM:
case ReductionOperation::MEAN_SUM:
- first_kernel_op = ReductionOperation::SUM;
- last_kernel_op = op;
+ first_kernel_op = ReductionOperation::SUM;
+ intermediate_kernel_op = ReductionOperation::SUM;
+ last_kernel_op = op;
break;
case ReductionOperation::SUM_SQUARE:
- first_kernel_op = ReductionOperation::SUM_SQUARE;
- last_kernel_op = ReductionOperation::SUM;
+ first_kernel_op = ReductionOperation::SUM_SQUARE;
+ intermediate_kernel_op = ReductionOperation::SUM;
+ last_kernel_op = ReductionOperation::SUM;
+ break;
+ case ReductionOperation::PROD:
+ first_kernel_op = ReductionOperation::PROD;
+ intermediate_kernel_op = ReductionOperation::PROD;
+ last_kernel_op = ReductionOperation::PROD;
break;
default:
ARM_COMPUTE_ERROR("Not supported");
@@ -103,17 +115,13 @@ Status CLReductionOperation::validate(const ITensorInfo *input, const ITensorInf
// Validate ReductionOperation on intermediate stages
for(unsigned int i = 1; i < num_of_stages - 1; ++i)
{
- ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperationKernel::validate(sums_vector.get() + i - 1, sums_vector.get() + i, axis, ReductionOperation::SUM));
+ ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperationKernel::validate(sums_vector.get() + i - 1, sums_vector.get() + i, axis, intermediate_kernel_op));
}
// Validate ReductionOperation on the last stage
const unsigned int last_stage = num_of_stages - 1;
ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperationKernel::validate(sums_vector.get() + last_stage - 1, output, axis, last_kernel_op, input->dimension(0)));
}
- else
- {
- ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperationKernel::validate(input, output, axis, op));
- }
return Status{};
}
@@ -122,65 +130,77 @@ void CLReductionOperation::configure(ICLTensor *input, ICLTensor *output, unsign
{
_num_of_stages = calculate_number_of_stages(input->info(), axis);
_reduction_axis = axis;
- _is_quantized = is_data_type_quantized(input->info()->data_type());
+ _is_serial = is_data_type_quantized(input->info()->data_type()) || axis != 0;
// Configure reduction operation kernels
_reduction_kernels_vector = arm_compute::support::cpp14::make_unique<CLReductionOperationKernel[]>(_num_of_stages);
// Create temporary tensors
- if(axis == 0 && !_is_quantized)
+ if(_is_serial)
+ {
+ _reduction_kernels_vector[0].configure(input, output, axis, op, 0);
+ }
+ else
{
_border_handlers_vector = arm_compute::support::cpp14::make_unique<CLFillBorderKernel[]>(_num_of_stages);
- _sums_vector = arm_compute::support::cpp14::make_unique<CLTensor[]>(_num_of_stages - 1);
+ _results_vector = arm_compute::support::cpp14::make_unique<CLTensor[]>(_num_of_stages - 1);
TensorShape shape{ input->info()->tensor_shape() };
for(unsigned int i = 0; i < _num_of_stages - 1; i++)
{
shape.set(0, ceil(shape.x() / 128.f));
- _sums_vector[i].allocator()->init(input->info()->clone()->set_tensor_shape(shape));
+ _results_vector[i].allocator()->init(input->info()->clone()->set_tensor_shape(shape));
}
// Apply ReductionOperation only on first kernel
- _memory_group.manage(_sums_vector.get());
+ _memory_group.manage(_results_vector.get());
ReductionOperation first_kernel_op;
+ ReductionOperation intermediate_kernel_op;
ReductionOperation last_kernel_op;
+ PixelValue pixelValue;
switch(op)
{
case ReductionOperation::SUM:
case ReductionOperation::MEAN_SUM:
- first_kernel_op = ReductionOperation::SUM;
- last_kernel_op = op;
+ first_kernel_op = ReductionOperation::SUM;
+ intermediate_kernel_op = ReductionOperation::SUM;
+ last_kernel_op = op;
+ pixelValue = PixelValue(0);
break;
case ReductionOperation::SUM_SQUARE:
- first_kernel_op = ReductionOperation::SUM_SQUARE;
- last_kernel_op = ReductionOperation::SUM;
+ first_kernel_op = ReductionOperation::SUM_SQUARE;
+ intermediate_kernel_op = ReductionOperation::SUM;
+ last_kernel_op = ReductionOperation::SUM;
+ pixelValue = PixelValue(0);
+ break;
+ case ReductionOperation::PROD:
+ first_kernel_op = ReductionOperation::PROD;
+ intermediate_kernel_op = ReductionOperation::PROD;
+ last_kernel_op = ReductionOperation::PROD;
+ pixelValue = PixelValue(1, input->info()->data_type());
break;
default:
ARM_COMPUTE_ERROR("Not supported");
}
- _reduction_kernels_vector[0].configure(input, _sums_vector.get(), axis, first_kernel_op);
- _border_handlers_vector[0].configure(input, _reduction_kernels_vector[0].border_size(), BorderMode::CONSTANT, PixelValue(0));
+ _reduction_kernels_vector[0].configure(input, _results_vector.get(), axis, first_kernel_op);
+ _border_handlers_vector[0].configure(input, _reduction_kernels_vector[0].border_size(), BorderMode::CONSTANT, pixelValue);
// Apply ReductionOperation on intermediate stages
for(unsigned int i = 1; i < _num_of_stages - 1; ++i)
{
- _memory_group.manage(_sums_vector.get() + i);
- _reduction_kernels_vector[i].configure(_sums_vector.get() + i - 1, _sums_vector.get() + i, axis, ReductionOperation::SUM);
- _border_handlers_vector[i].configure(_sums_vector.get() + i - 1, _reduction_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue(0));
- _sums_vector[i - 1].allocator()->allocate();
+ _memory_group.manage(_results_vector.get() + i);
+ _reduction_kernels_vector[i].configure(_results_vector.get() + i - 1, _results_vector.get() + i, axis, intermediate_kernel_op);
+ _border_handlers_vector[i].configure(_results_vector.get() + i - 1, _reduction_kernels_vector[i].border_size(), BorderMode::CONSTANT, pixelValue);
+ _results_vector[i - 1].allocator()->allocate();
}
// Apply ReductionOperation on the last stage
const unsigned int last_stage = _num_of_stages - 1;
const unsigned int input_width = input->info()->dimension(0);
- _reduction_kernels_vector[last_stage].configure(_sums_vector.get() + last_stage - 1, output, axis, last_kernel_op, input_width);
- _border_handlers_vector[last_stage].configure(_sums_vector.get() + last_stage - 1, _reduction_kernels_vector[last_stage].border_size(), BorderMode::CONSTANT, PixelValue(0));
- _sums_vector[last_stage - 1].allocator()->allocate();
- }
- else
- {
- _reduction_kernels_vector[0].configure(input, output, axis, op, 0);
+ _reduction_kernels_vector[last_stage].configure(_results_vector.get() + last_stage - 1, output, axis, last_kernel_op, input_width);
+ _border_handlers_vector[last_stage].configure(_results_vector.get() + last_stage - 1, _reduction_kernels_vector[last_stage].border_size(), BorderMode::CONSTANT, pixelValue);
+ _results_vector[last_stage - 1].allocator()->allocate();
}
}
@@ -188,7 +208,11 @@ void CLReductionOperation::run()
{
_memory_group.acquire();
- if(_reduction_axis == 0 && !_is_quantized)
+ if(_is_serial)
+ {
+ CLScheduler::get().enqueue(_reduction_kernels_vector[0], false);
+ }
+ else
{
for(unsigned int i = 0; i < _num_of_stages; ++i)
{
@@ -196,10 +220,6 @@ void CLReductionOperation::run()
CLScheduler::get().enqueue(_reduction_kernels_vector[i], false);
}
}
- else
- {
- CLScheduler::get().enqueue(_reduction_kernels_vector[0], false);
- }
_memory_group.release();
}
diff --git a/src/runtime/NEON/functions/NEIntegralImage.cpp b/src/runtime/NEON/functions/NEIntegralImage.cpp
index fa8aaeb5dd..43308fa169 100644
--- a/src/runtime/NEON/functions/NEIntegralImage.cpp
+++ b/src/runtime/NEON/functions/NEIntegralImage.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,5 +36,5 @@ void NEIntegralImage::configure(const ITensor *input, ITensor *output)
auto k = arm_compute::support::cpp14::make_unique<NEIntegralImageKernel>();
k->configure(input, output);
_kernel = std::move(k);
- _border_handler.configure(output, _kernel->border_size(), BorderMode::CONSTANT, static_cast<float>(0.f));
+ _border_handler.configure(output, _kernel->border_size(), BorderMode::CONSTANT, PixelValue(0));
}
diff --git a/src/runtime/NEON/functions/NEScale.cpp b/src/runtime/NEON/functions/NEScale.cpp
index a9c85bd726..169b9bbf6a 100644
--- a/src/runtime/NEON/functions/NEScale.cpp
+++ b/src/runtime/NEON/functions/NEScale.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -167,7 +167,7 @@ void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy pol
ARM_COMPUTE_ERROR("Unsupported interpolation mode");
}
- _border_handler.configure(input, _scale_kernel.border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler.configure(input, _scale_kernel.border_size(), border_mode, constant_border_value);
}
Status NEScale::validate(const ITensorInfo *input, const ITensorInfo *output, InterpolationPolicy policy,
diff --git a/tests/datasets/ReductionOperationDataset.h b/tests/datasets/ReductionOperationDataset.h
deleted file mode 100644
index dadc4e9249..0000000000
--- a/tests/datasets/ReductionOperationDataset.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_TEST_REDUCTION_OPERATION_DATASET_H__
-#define __ARM_COMPUTE_TEST_REDUCTION_OPERATION_DATASET_H__
-
-#include "arm_compute/core/Types.h"
-#include "tests/framework/datasets/ContainerDataset.h"
-#include "utils/TypePrinter.h"
-
-#include <vector>
-
-namespace arm_compute
-{
-namespace test
-{
-namespace datasets
-{
-class ReductionOperations final : public framework::dataset::ContainerDataset<std::vector<ReductionOperation>>
-{
-public:
- ReductionOperations()
- : ContainerDataset("ReductionOperation",
- {
- ReductionOperation::SUM
- })
- {
- }
-};
-} // namespace datasets
-} // namespace test
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_REDUCTION_OPERATION_DATASET_H__ */
diff --git a/tests/validation/CL/ReductionOperation.cpp b/tests/validation/CL/ReductionOperation.cpp
index 2adb4e90d6..c8474e97e6 100644
--- a/tests/validation/CL/ReductionOperation.cpp
+++ b/tests/validation/CL/ReductionOperation.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,7 +27,6 @@
#include "arm_compute/runtime/CL/functions/CLReductionOperation.h"
#include "tests/CL/CLAccessor.h"
#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ReductionOperationDataset.h"
#include "tests/datasets/ShapeDatasets.h"
#include "tests/framework/Asserts.h"
#include "tests/framework/Macros.h"
@@ -44,10 +43,17 @@ namespace validation
namespace
{
/** Tolerance for float operations */
-AbsoluteTolerance<float> tolerance_f32(0.01f);
+AbsoluteTolerance<float> tolerance_f32(0.001f);
RelativeTolerance<float> rel_tolerance_f32(0.00001f);
AbsoluteTolerance<float> tolerance_f16(0.5f);
RelativeTolerance<float> rel_tolerance_f16(0.2f);
+
+const auto ReductionOperations = framework::dataset::make("ReductionOperation",
+{
+ ReductionOperation::SUM,
+ ReductionOperation::PROD
+});
+
} // namespace
TEST_SUITE(CL)
@@ -89,13 +95,13 @@ using CLReductionOperationFixture = ReductionOperationFixture<CLTensor, CLAccess
TEST_SUITE(Float)
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(RunSmall, CLReductionOperationFixture<half>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), datasets::ReductionOperations()))
+ combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), ReductionOperations))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f16);
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLReductionOperationFixture<half>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), datasets::ReductionOperations()))
+ combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), ReductionOperations))
{
// Validate output
validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0, tolerance_f16);
@@ -103,13 +109,13 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLReductionOperationFixture<half>, framework::D
TEST_SUITE_END() // F16
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(RunSmall, CLReductionOperationFixture<float>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), datasets::ReductionOperations()))
+ combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), ReductionOperations))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f32);
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLReductionOperationFixture<float>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), datasets::ReductionOperations()))
+ combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), ReductionOperations))
{
// Validate output
validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0, tolerance_f32);
diff --git a/tests/validation/NEON/ReductionOperation.cpp b/tests/validation/NEON/ReductionOperation.cpp
index d064940366..e322947993 100644
--- a/tests/validation/NEON/ReductionOperation.cpp
+++ b/tests/validation/NEON/ReductionOperation.cpp
@@ -27,7 +27,6 @@
#include "arm_compute/runtime/TensorAllocator.h"
#include "tests/NEON/Accessor.h"
#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ReductionOperationDataset.h"
#include "tests/datasets/ShapeDatasets.h"
#include "tests/framework/Asserts.h"
#include "tests/framework/Macros.h"
@@ -48,6 +47,9 @@ AbsoluteTolerance<float> tolerance_f32(0.0001f);
RelativeTolerance<float> rel_tolerance_f32(0.00001f);
/** Tolerance for quantized operations */
RelativeTolerance<float> tolerance_qasymm8(1);
+
+const auto ReductionOperations = framework::dataset::make("ReductionOperation",
+{ ReductionOperation::SUM });
} // namespace
TEST_SUITE(NEON)
@@ -86,13 +88,13 @@ using NEReductionOperationFixture = ReductionOperationFixture<Tensor, Accessor,
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(RunSmall, NEReductionOperationFixture<float>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), datasets::ReductionOperations()))
+ combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), ReductionOperations))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_f32);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEReductionOperationFixture<float>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), datasets::ReductionOperations()))
+ combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), ReductionOperations))
{
// Validate output
validate(Accessor(_target), _reference, rel_tolerance_f32, 0, tolerance_f32);
@@ -105,7 +107,7 @@ using NEReductionOperationQuantizedFixture = ReductionOperationQuantizedFixture<
TEST_SUITE(QASYMM8)
FIXTURE_DATA_TEST_CASE(RunSmall, NEReductionOperationQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
- datasets::ReductionOperations()),
+ ReductionOperations),
framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255, 0) })))
{
// Validate output
@@ -113,7 +115,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEReductionOperationQuantizedFixture<uint8_t>,
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEReductionOperationQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
- datasets::ReductionOperations()),
+ ReductionOperations),
framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255, 0) })))
{
// Validate output
diff --git a/tests/validation/reference/ReductionOperation.cpp b/tests/validation/reference/ReductionOperation.cpp
index fc12e31d75..8e79c3bfb0 100644
--- a/tests/validation/reference/ReductionOperation.cpp
+++ b/tests/validation/reference/ReductionOperation.cpp
@@ -42,11 +42,11 @@ template <typename T, typename OT>
OT reduce_operation(const T *ptr, int reduce_elements, ReductionOperation op, int stride)
{
using type = typename std::remove_cv<OT>::type;
- auto res = type(0);
+ auto res = (op == ReductionOperation::PROD) ? type(1) : type(0);
if(std::is_integral<type>::value)
{
- uint32_t int_res = 0;
+ auto int_res = static_cast<uint32_t>(res);
for(int i = 0; i < reduce_elements; ++i)
{
auto elem = *(ptr + stride * i);
@@ -72,6 +72,9 @@ OT reduce_operation(const T *ptr, int reduce_elements, ReductionOperation op, in
case ReductionOperation::SUM:
int_res += elem;
break;
+ case ReductionOperation::PROD:
+ int_res *= elem;
+ break;
default:
ARM_COMPUTE_ERROR("Operation not supported");
}
@@ -108,6 +111,9 @@ OT reduce_operation(const T *ptr, int reduce_elements, ReductionOperation op, in
case ReductionOperation::SUM:
res += elem;
break;
+ case ReductionOperation::PROD:
+ res *= elem;
+ break;
default:
ARM_COMPUTE_ERROR("Operation not supported");
}
@@ -117,7 +123,6 @@ OT reduce_operation(const T *ptr, int reduce_elements, ReductionOperation op, in
res /= reduce_elements;
}
}
-
return res;
}
} // namespace
diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h
index 496e49beb1..f2cf606a00 100644
--- a/utils/TypePrinter.h
+++ b/utils/TypePrinter.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -1397,6 +1397,9 @@ inline ::std::ostream &operator<<(::std::ostream &os, const ReductionOperation &
case ReductionOperation::ARG_IDX_MIN:
os << "ARG_IDX_MIN";
break;
+ case ReductionOperation::PROD:
+ os << "PROD";
+ break;
default:
ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
}