aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-11-19 11:56:51 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2018-11-22 12:41:32 +0000
commit303f0dbebf631b3db00d9d64e71018abbbe9d4fe (patch)
tree631e70c9a8141f1262752829a64b3e33c7f1ee93 /src
parent9d3a831d4131f8a8b37f127f11d36848d33e8496 (diff)
downloadComputeLibrary-303f0dbebf631b3db00d9d64e71018abbbe9d4fe.tar.gz
COMPMID-1718: Extend DepthConvert to support Cast
Change-Id: I6ee2c0b670727fc808fa636c53ddfaec3a0036c9
Diffstat (limited to 'src')
-rw-r--r--src/core/CL/cl_kernels/depth_convert.cl6
-rw-r--r--src/core/CL/kernels/CLDepthConvertLayerKernel.cpp57
-rw-r--r--src/core/NEON/kernels/NESoftmaxLayerKernel.cpp4
-rw-r--r--src/runtime/CL/functions/CLCast.cpp44
-rw-r--r--src/runtime/CL/functions/CLDepthConvertLayer.cpp5
5 files changed, 69 insertions, 47 deletions
diff --git a/src/core/CL/cl_kernels/depth_convert.cl b/src/core/CL/cl_kernels/depth_convert.cl
index 611449e614..7b03273b7b 100644
--- a/src/core/CL/cl_kernels/depth_convert.cl
+++ b/src/core/CL/cl_kernels/depth_convert.cl
@@ -69,8 +69,7 @@ __kernel void convert_depth_down(
in_data = vload16(0, (__global DATA_TYPE_IN *)in.ptr);
#if defined(IS_DATA_TYPE_FLOAT)
- const DATA_TYPE_IN scale = (DATA_TYPE_IN)(1 << shift);
- vstore16(CONVERT_DOWN(in_data / scale, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr);
+ vstore16(CONVERT_DOWN(in_data, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr);
#else /* defined(IS_DATA_TYPE_FLOAT) */
vstore16(CONVERT_DOWN(in_data >> shift, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr);
#endif /* defined(IS_DATA_TYPE_FLOAT) */
@@ -109,8 +108,7 @@ __kernel void convert_depth_up(
in_data = vload16(0, (__global DATA_TYPE_IN *)in.ptr);
#if defined(IS_DATA_TYPE_FLOAT)
- const DATA_TYPE_OUT scale = (DATA_TYPE_OUT)(1 << shift);
- vstore16(CONVERT_UP(in_data, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)) * scale, 0, (__global DATA_TYPE_OUT *)out.ptr);
+ vstore16(CONVERT_UP(in_data, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr);
#else /* defined(IS_DATA_TYPE_FLOAT) */
vstore16(CONVERT_UP(in_data, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)) << shift, 0, (__global DATA_TYPE_OUT *)out.ptr);
#endif /* defined(IS_DATA_TYPE_FLOAT) */
diff --git a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
index ffbd295646..b0c21624ed 100644
--- a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
@@ -37,8 +37,8 @@
#include <set>
#include <string>
-using namespace arm_compute;
-
+namespace arm_compute
+{
namespace
{
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift)
@@ -46,42 +46,20 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, C
ARM_COMPUTE_UNUSED(policy);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON(input == output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16,
- DataType::U16, DataType::U32, DataType::S32,
- DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16,
- DataType::U16, DataType::U32, DataType::S32,
- DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input,
+ 1,
+ DataType::U8, DataType::S8, DataType::S16,
+ DataType::U16, DataType::U32, DataType::S32, DataType::F16,
+ DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output,
+ 1,
+ DataType::U8, DataType::S8, DataType::S16,
+ DataType::U16, DataType::U32, DataType::S32, DataType::F16,
+ DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == output->data_type(), "Input and output data types must be different");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_float(input->data_type()) && shift != 0, "Shift is used only with integer inputs");
ARM_COMPUTE_RETURN_ERROR_ON(shift >= 8);
- // Check if convertion is supported
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U8 && (output->data_type() != DataType::U16 && output->data_type() != DataType::S16
- && output->data_type() != DataType::U32 && output->data_type() != DataType::S32),
- "Only data types supported [in] U8 -> [out] U16, S16, U32, S32");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U16 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U32
- && output->data_type() != DataType::S32),
- "Only data types supported [in] U16 -> [out] U8, U32, S32");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S16 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U32
- && output->data_type() != DataType::S32),
- "Only data types supported [in] S16 -> [out] U8, U32, S32");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U32 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U16
- && output->data_type() != DataType::S16),
- "Only data types supported [in] U32 -> [out] U8, U16, S16");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S32 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U16
- && output->data_type() != DataType::S16),
- "Only data types supported [in] S32 -> [out] U8, U16, S16");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F16 && output->data_type() != DataType::F32,
- "Only data types supported [in] F16 -> [out] F32");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F32 && output->data_type() != DataType::F16,
- "Only data types supported [in] F32 -> [out] F16");
-
// Validate in case of configured output
if(output->total_size() > 0)
{
@@ -109,12 +87,12 @@ void CLDepthConvertLayerKernel::configure(const ICLTensor *input, ICLTensor *out
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type()));
build_opts.add_option("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
- // Down conversions from float always SATURATE as out-of-bounds conversion from float->integer is implementation defined
- build_opts.add_option_if(input_size > output_size, ((policy == ConvertPolicy::WRAP) && !is_data_type_float(input->info()->data_type())) ? "-DWRAP" : "-DSATURATE");
- build_opts.add_option_if(is_data_type_float(input->info()->data_type()), "-DIS_DATA_TYPE_FLOAT");
+ // Conversions from float always SATURATE as out-of-bounds conversion from float->integer is implementation defined
+ build_opts.add_option_if(is_data_type_float(input->info()->data_type()) || policy == ConvertPolicy::SATURATE, "-DSATURATE");
+ build_opts.add_option_if(is_data_type_float(input->info()->data_type()) || is_data_type_float(output->info()->data_type()), "-DIS_DATA_TYPE_FLOAT");
// Create kernel
- const std::string kernel_name = (input_size > output_size) ? "convert_depth_down" : "convert_depth_up";
+ const std::string kernel_name = (input_size >= output_size) ? "convert_depth_down" : "convert_depth_up";
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
// Set shift arg
@@ -132,3 +110,4 @@ Status CLDepthConvertLayerKernel::validate(const ITensorInfo *input, const ITens
return Status{};
}
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
index 0f416defab..e9417ece44 100644
--- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
+++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
@@ -34,7 +34,7 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/misc/Utility.h"
+#include "arm_compute/core/utils/misc/SaturateCast.h"
#include <algorithm>
#include <arm_neon.h>
@@ -667,7 +667,7 @@ void logits_1d_softmax_qasymm8(const ITensor &in, const ITensor &max, void *cons
/* Run remaining elements */
for(; i < input_width; ++i)
{
- out_ptr[i] = utility::saturate_cast<qasymm8_t>(tmp_ptr[i] * sum_inversed);
+ out_ptr[i] = utils::cast::saturate_cast<qasymm8_t>(tmp_ptr[i] * sum_inversed);
}
}
},
diff --git a/src/runtime/CL/functions/CLCast.cpp b/src/runtime/CL/functions/CLCast.cpp
new file mode 100644
index 0000000000..e0ffcdb09b
--- /dev/null
+++ b/src/runtime/CL/functions/CLCast.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLCast.h"
+
+#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "support/ToolchainSupport.h"
+
+#include <utility>
+
+namespace arm_compute
+{
+void CLCast::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CLDepthConvertLayerKernel>();
+ k->configure(input, output, policy, 0);
+ _kernel = std::move(k);
+}
+
+Status CLCast::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy)
+{
+ return CLDepthConvertLayerKernel::validate(input, output, policy, 0);
+}
+} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLDepthConvertLayer.cpp b/src/runtime/CL/functions/CLDepthConvertLayer.cpp
index 2e52e8aadc..dbf71ac1e0 100644
--- a/src/runtime/CL/functions/CLDepthConvertLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthConvertLayer.cpp
@@ -28,8 +28,8 @@
#include <utility>
-using namespace arm_compute;
-
+namespace arm_compute
+{
void CLDepthConvertLayer::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift)
{
auto k = arm_compute::support::cpp14::make_unique<CLDepthConvertLayerKernel>();
@@ -41,3 +41,4 @@ Status CLDepthConvertLayer::validate(const ITensorInfo *input, const ITensorInfo
{
return CLDepthConvertLayerKernel::validate(input, output, policy, shift);
}
+} // namespace arm_compute