diff options
10 files changed, 216 insertions, 81 deletions
diff --git a/arm_compute/core/NEON/NEAsymm.h b/arm_compute/core/NEON/NEAsymm.h index faff59563b..c7f59e9eba 100644 --- a/arm_compute/core/NEON/NEAsymm.h +++ b/arm_compute/core/NEON/NEAsymm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -124,6 +124,66 @@ uint8x16_t finalize_quantization(int32x4x4_t &in_s32, return out_u8; } + +/** Dequantize a neon vector holding 16 quantized values. + * + * @param qv Input values to be dequantized. + * @param qi Quantization information to be used in the computation. + * + * @return Dequantized values in a neon vector + */ +inline float32x4x4_t vdequantize(const uint8x16_t &qv, const QuantizationInfo &qi) +{ + const float scale = qi.scale; + const int offset = qi.offset; + const int32x4_t voffset = vdupq_n_s32(offset); + const float32x4_t vscale = vdupq_n_f32(scale); + const float32x4x4_t vdequantized_input = + { + { + vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(qv))))), voffset)), vscale), + vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(qv))))), voffset)), vscale), + vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(qv))))), voffset)), vscale), + vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(qv))))), voffset)), vscale), + } + }; + return vdequantized_input; +} + +/** Quantize a neon vector holding 16 floating point values. + * + * @param qv Input values to be quantized. + * @param qi Quantization information to be used in the computation. + * + * @return A neon vector holding the quantized values + */ +inline uint8x16_t vquantize(const float32x4x4_t &qv, const QuantizationInfo &qi) +{ + const float scale = qi.scale; + const int offset = qi.offset; + const float32x4_t voffset = vdupq_n_f32(offset); + const float32x4_t vinvscale = vdupq_n_f32(1.f / scale); + const int32x4x4_t rf = + { + { +#ifdef __aarch64__ + vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)), + vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)), + vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)), + vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)), +#else //__aarch64__ + vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)), + vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)), + vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)), + vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)), +#endif //__aarch64__ + } + }; + const uint8x8_t pa = vqmovun_s16(vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1]))); + const uint8x8_t pb = vqmovun_s16(vcombine_s16(vqmovn_s32(rf.val[2]), vqmovn_s32(rf.val[3]))); + return vcombine_u8(pa, pb); +} + } // namespace arm_compute #include "arm_compute/core/NEON/NEAsymm.inl" #endif // __ARM_COMPUTE_NEASYMM_H__ diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp index 8c875cdb2d..8352c94586 100644 --- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,7 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/NEAsymm.h" #include "arm_compute/core/NEON/NEFixedPoint.h" #include "arm_compute/core/NEON/wrapper/wrapper.h" #include "arm_compute/core/TensorInfo.h" @@ -57,14 +58,30 @@ void depth_concat(const ITensor *in, ITensor *out, std::pair<int, int> start_xy, Iterator input(in, window); Iterator output(out, window); - execute_window_loop(window, [&](const Coordinates & id) + const DataType dt = in->info()->data_type(); + const QuantizationInfo &input_qinfo = in->info()->quantization_info(); + const QuantizationInfo &output_qinfo = out->info()->quantization_info(); + if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo) { - const auto in_ptr = reinterpret_cast<const T *>(input_ptr + input.offset()); - const auto out_ptr = reinterpret_cast<T *>(output_ptr + output.offset()); - - wrapper::vstore(out_ptr, wrapper::vloadq(in_ptr)); - }, - input, output); + execute_window_loop(window, [&](const Coordinates &) + { + const auto in_ptr = reinterpret_cast<const uint8_t *>(input_ptr + input.offset()); + const auto out_ptr = reinterpret_cast<uint8_t *>(output_ptr + output.offset()); + vst1q_u8(out_ptr, vquantize(vdequantize(vld1q_u8(in_ptr), input_qinfo), output_qinfo)); + }, + input, output); + } + else + { + execute_window_loop(window, [&](const Coordinates &) + { + const auto in_ptr = reinterpret_cast<const T *>(input_ptr + input.offset()); + const auto out_ptr = reinterpret_cast<T *>(output_ptr + output.offset()); + + wrapper::vstore(out_ptr, wrapper::vloadq(in_ptr)); + }, + input, output); + } } std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output) diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp index a84a6d9028..ca27a26493 100644 --- a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,7 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/NEAsymm.h" #include "arm_compute/core/NEON/wrapper/wrapper.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" @@ -110,15 +111,28 @@ void NEWidthConcatenateLayerKernel::run(const Window &window, const ThreadInfo & uint8_t *output_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes() + _width_offset * _output->info()->strides_in_bytes()[0]; // Create iterators - Iterator input(_input, window); - Iterator output(_output, window); - - execute_window_loop(window, [&](const Coordinates & id) + Iterator input(_input, window); + Iterator output(_output, window); + const DataType dt = _input->info()->data_type(); + const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); + const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); + if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo) { - const auto in_ptr = input.ptr(); - const auto out_ptr = output_ptr + output.offset(); - - wrapper::vstore(out_ptr, wrapper::vloadq(in_ptr)); - }, - input, output); + execute_window_loop(window, [&](const Coordinates &) + { + vst1q_u8(output_ptr + output.offset(), vquantize(vdequantize(vld1q_u8(input.ptr()), input_qinfo), output_qinfo)); + }, + input, output); + } + else + { + execute_window_loop(window, [&](const Coordinates &) + { + const auto in_ptr = input.ptr(); + const auto out_ptr = output_ptr + output.offset(); + + wrapper::vstore(out_ptr, wrapper::vloadq(in_ptr)); + }, + input, output); + } } diff --git a/src/runtime/NEON/functions/NEWidthConcatenateLayer.cpp b/src/runtime/NEON/functions/NEWidthConcatenateLayer.cpp index 097605c062..7e435c34b1 100644 --- a/src/runtime/NEON/functions/NEWidthConcatenateLayer.cpp +++ b/src/runtime/NEON/functions/NEWidthConcatenateLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -66,7 +66,7 @@ void NEWidthConcatenateLayer::configure(std::vector<ITensor *> inputs_vector, IT _num_inputs = inputs_vector.size(); std::vector<ITensorInfo *> inputs_vector_info; - for(unsigned int i = 0; i < _num_inputs; i++) + for(unsigned int i = 0; i < _num_inputs; ++i) { inputs_vector_info.emplace_back(inputs_vector.at(i)->info()); } @@ -80,7 +80,7 @@ void NEWidthConcatenateLayer::configure(std::vector<ITensor *> inputs_vector, IT _concat_kernels_vector = arm_compute::support::cpp14::make_unique<NEWidthConcatenateLayerKernel[]>(_num_inputs); - for(unsigned int i = 0; i < _num_inputs; i++) + for(unsigned int i = 0; i < _num_inputs; ++i) { _concat_kernels_vector[i].configure(inputs_vector.at(i), width_offset, output); width_offset += inputs_vector.at(i)->info()->dimension(0); @@ -89,7 +89,7 @@ void NEWidthConcatenateLayer::configure(std::vector<ITensor *> inputs_vector, IT void NEWidthConcatenateLayer::run() { - for(unsigned i = 0; i < _num_inputs; i++) + for(unsigned i = 0; i < _num_inputs; ++i) { NEScheduler::get().schedule(_concat_kernels_vector.get() + i, Window::DimY); } diff --git a/tests/validation/fixtures/DepthConcatenateLayerFixture.h b/tests/validation/fixtures/DepthConcatenateLayerFixture.h index 5fdfacbb76..edeefa228a 100644 --- a/tests/validation/fixtures/DepthConcatenateLayerFixture.h +++ b/tests/validation/fixtures/DepthConcatenateLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -53,9 +53,22 @@ public: // Create input shapes std::mt19937 gen(library->seed()); std::uniform_int_distribution<> num_dis(2, 4); - const int num_tensors = num_dis(gen); + std::uniform_int_distribution<> offset_dis(0, 20); + + const int num_tensors = num_dis(gen); + + std::vector<TensorShape> shapes(num_tensors, shape); + + // vector holding the quantization info: + // the last element is the output quantization info + // all other elements are the quantization info for the input tensors + std::vector<QuantizationInfo> qinfo(num_tensors + 1, QuantizationInfo()); + + for(auto &qi : qinfo) + { + qi = QuantizationInfo(1.f / 255.f, offset_dis(gen)); + } - std::vector<TensorShape> shapes(num_tensors, shape); std::uniform_int_distribution<> depth_dis(1, 3); std::bernoulli_distribution mutate_dis(0.5f); std::uniform_real_distribution<> change_dis(-0.25f, 0.f); @@ -82,8 +95,8 @@ public: } } - _target = compute_target(shapes, data_type); - _reference = compute_reference(shapes, data_type); + _target = compute_target(shapes, qinfo, data_type); + _reference = compute_reference(shapes, qinfo, data_type); } protected: @@ -93,7 +106,7 @@ protected: library->fill_tensor_uniform(tensor, i); } - TensorType compute_target(std::vector<TensorShape> shapes, DataType data_type) + TensorType compute_target(std::vector<TensorShape> shapes, const std::vector<QuantizationInfo> &qinfo, DataType data_type) { std::vector<TensorType> srcs; std::vector<ITensorType *> src_ptrs; @@ -101,14 +114,14 @@ protected: // Create tensors srcs.reserve(shapes.size()); - for(const auto &shape : shapes) + for(size_t j = 0; j < shapes.size(); ++j) { - srcs.emplace_back(create_tensor<TensorType>(shape, data_type, 1)); + srcs.emplace_back(create_tensor<TensorType>(shapes[j], data_type, 1, qinfo[j])); src_ptrs.emplace_back(&srcs.back()); } TensorShape dst_shape = misc::shape_calculator::calculate_depth_concatenate_shape(src_ptrs); - TensorType dst = create_tensor<TensorType>(dst_shape, data_type, 1); + TensorType dst = create_tensor<TensorType>(dst_shape, data_type, 1, qinfo[shapes.size()]); // Create and configure function FunctionType depth_concat; @@ -144,19 +157,21 @@ protected: return dst; } - SimpleTensor<T> compute_reference(std::vector<TensorShape> shapes, DataType data_type) + SimpleTensor<T> compute_reference(std::vector<TensorShape> shapes, const std::vector<QuantizationInfo> &qinfo, DataType data_type) { std::vector<SimpleTensor<T>> srcs; // Create and fill tensors - int i = 0; - for(const auto &shape : shapes) + for(size_t j = 0; j < shapes.size(); ++j) { - srcs.emplace_back(shape, data_type, 1); - fill(srcs.back(), i++); + srcs.emplace_back(shapes[j], data_type, 1, qinfo[j]); + fill(srcs.back(), j); } - return reference::depthconcatenate_layer<T>(srcs); + const TensorShape dst_shape = calculate_depth_concatenate_shape(shapes); + SimpleTensor<T> dst{ dst_shape, data_type, 1, qinfo[shapes.size()] }; + + return reference::depthconcatenate_layer<T>(srcs, dst); } TensorType _target{}; diff --git a/tests/validation/fixtures/WidthConcatenateLayerFixture.h b/tests/validation/fixtures/WidthConcatenateLayerFixture.h index 1f79210350..47a03ed865 100644 --- a/tests/validation/fixtures/WidthConcatenateLayerFixture.h +++ b/tests/validation/fixtures/WidthConcatenateLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -53,9 +53,20 @@ public: // Create input shapes std::mt19937 gen(library->seed()); std::uniform_int_distribution<> num_dis(2, 8); - const int num_tensors = num_dis(gen); + std::uniform_int_distribution<> offset_dis(0, 20); - std::vector<TensorShape> shapes(num_tensors, shape); + const int num_tensors = num_dis(gen); + + std::vector<TensorShape> shapes(num_tensors, shape); + + // vector holding the quantization info: + // the last element is the output quantization info + // all other elements are the quantization info for the input tensors + std::vector<QuantizationInfo> qinfo(num_tensors + 1, QuantizationInfo()); + for(auto &qi : qinfo) + { + qi = QuantizationInfo(1.f / 255.f, offset_dis(gen)); + } std::bernoulli_distribution mutate_dis(0.5f); std::uniform_real_distribution<> change_dis(-0.25f, 0.f); @@ -71,8 +82,8 @@ public: } } - _target = compute_target(shapes, data_type); - _reference = compute_reference(shapes, data_type); + _target = compute_target(shapes, qinfo, data_type); + _reference = compute_reference(shapes, qinfo, data_type); } protected: @@ -82,7 +93,7 @@ protected: library->fill_tensor_uniform(tensor, i); } - TensorType compute_target(std::vector<TensorShape> shapes, DataType data_type) + TensorType compute_target(std::vector<TensorShape> shapes, const std::vector<QuantizationInfo> &qinfo, DataType data_type) { std::vector<TensorType> srcs; std::vector<ITensorType *> src_ptrs; @@ -90,14 +101,15 @@ protected: // Create tensors srcs.reserve(shapes.size()); - for(const auto &shape : shapes) + for(size_t j = 0; j < shapes.size(); ++j) { - srcs.emplace_back(create_tensor<TensorType>(shape, data_type, 1)); + srcs.emplace_back(create_tensor<TensorType>(shapes[j], data_type, 1, qinfo[j])); src_ptrs.emplace_back(&srcs.back()); } TensorShape dst_shape = misc::shape_calculator::calculate_width_concatenate_shape(src_ptrs); - TensorType dst = create_tensor<TensorType>(dst_shape, data_type, 1); + + TensorType dst = create_tensor<TensorType>(dst_shape, data_type, 1, qinfo[shapes.size()]); // Create and configure function FunctionType width_concat; @@ -133,19 +145,21 @@ protected: return dst; } - SimpleTensor<T> compute_reference(std::vector<TensorShape> shapes, DataType data_type) + SimpleTensor<T> compute_reference(std::vector<TensorShape> shapes, const std::vector<QuantizationInfo> &qinfo, DataType data_type) { std::vector<SimpleTensor<T>> srcs; // Create and fill tensors - int i = 0; - for(const auto &shape : shapes) + for(size_t j = 0; j < shapes.size(); ++j) { - srcs.emplace_back(shape, data_type, 1); - fill(srcs.back(), i++); + srcs.emplace_back(shapes[j], data_type, 1, qinfo[j]); + fill(srcs.back(), j); } - return reference::widthconcatenate_layer<T>(srcs); + const TensorShape dst_shape = calculate_width_concatenate_shape(shapes); + SimpleTensor<T> dst{ dst_shape, data_type, 1, qinfo[shapes.size()] }; + + return reference::widthconcatenate_layer<T>(srcs, dst); } TensorType _target{}; diff --git a/tests/validation/reference/DepthConcatenateLayer.cpp b/tests/validation/reference/DepthConcatenateLayer.cpp index 90fbd915b1..6551f0c79e 100644 --- a/tests/validation/reference/DepthConcatenateLayer.cpp +++ b/tests/validation/reference/DepthConcatenateLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -34,7 +34,7 @@ namespace validation namespace reference { template <typename T> -SimpleTensor<T> depthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs) +SimpleTensor<T> depthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs, SimpleTensor<T> &dst) { // Create reference std::vector<TensorShape> shapes; @@ -44,10 +44,6 @@ SimpleTensor<T> depthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs) shapes.emplace_back(src.shape()); } - DataType dst_type = srcs.empty() ? DataType::UNKNOWN : srcs[0].data_type(); - TensorShape dst_shape = calculate_depth_concatenate_shape(shapes); - SimpleTensor<T> dst(dst_shape, dst_type); - // Compute reference int depth_offset = 0; const int width_out = dst.shape().x(); @@ -80,8 +76,20 @@ SimpleTensor<T> depthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs) { for(int r = 0; r < height; ++r) { - std::copy(src_ptr, src_ptr + width, dst.data() + offset_to_first_element + d * out_stride_z + r * width_out); - src_ptr += width; + if(src.data_type() == DataType::QASYMM8 && src.quantization_info() != dst.quantization_info()) + { + std::transform(src_ptr, src_ptr + width, dst.data() + offset_to_first_element + d * out_stride_z + r * width_out, [src, dst](T t) + { + const float dequantized_input = src.quantization_info().dequantize(t); + return dst.quantization_info().quantize(dequantized_input, RoundingPolicy::TO_NEAREST_UP); + }); + src_ptr += width; + } + else + { + std::copy(src_ptr, src_ptr + width, dst.data() + offset_to_first_element + d * out_stride_z + r * width_out); + src_ptr += width; + } } } } @@ -92,9 +100,9 @@ SimpleTensor<T> depthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs) return dst; } -template SimpleTensor<uint8_t> depthconcatenate_layer(const std::vector<SimpleTensor<uint8_t>> &srcs); -template SimpleTensor<float> depthconcatenate_layer(const std::vector<SimpleTensor<float>> &srcs); -template SimpleTensor<half> depthconcatenate_layer(const std::vector<SimpleTensor<half>> &srcs); +template SimpleTensor<uint8_t> depthconcatenate_layer(const std::vector<SimpleTensor<uint8_t>> &srcs, SimpleTensor<uint8_t> &dst); +template SimpleTensor<float> depthconcatenate_layer(const std::vector<SimpleTensor<float>> &srcs, SimpleTensor<float> &dst); +template SimpleTensor<half> depthconcatenate_layer(const std::vector<SimpleTensor<half>> &srcs, SimpleTensor<half> &dst); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/DepthConcatenateLayer.h b/tests/validation/reference/DepthConcatenateLayer.h index 3c486a8015..8a78441651 100644 --- a/tests/validation/reference/DepthConcatenateLayer.h +++ b/tests/validation/reference/DepthConcatenateLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,7 +37,7 @@ namespace validation namespace reference { template <typename T> -SimpleTensor<T> depthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs); +SimpleTensor<T> depthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs, SimpleTensor<T> &dst); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/WidthConcatenateLayer.cpp b/tests/validation/reference/WidthConcatenateLayer.cpp index 6be171b64d..38543393ce 100644 --- a/tests/validation/reference/WidthConcatenateLayer.cpp +++ b/tests/validation/reference/WidthConcatenateLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -34,7 +34,7 @@ namespace validation namespace reference { template <typename T> -SimpleTensor<T> widthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs) +SimpleTensor<T> widthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs, SimpleTensor<T> &dst) { // Create reference std::vector<TensorShape> shapes; @@ -44,10 +44,6 @@ SimpleTensor<T> widthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs) shapes.emplace_back(src.shape()); } - DataType dst_type = srcs.empty() ? DataType::UNKNOWN : srcs[0].data_type(); - TensorShape dst_shape = calculate_width_concatenate_shape(shapes); - SimpleTensor<T> dst(dst_shape, dst_type); - // Compute reference int width_offset = 0; const int width_out = dst.shape().x(); @@ -74,21 +70,32 @@ SimpleTensor<T> widthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs) for(int r = 0; r < height; ++r) { const int offset = u * height * depth + d * height + r; - std::copy(src_ptr, src_ptr + width, dst_ptr + width_offset + offset * width_out); - src_ptr += width; + if(src.data_type() == DataType::QASYMM8 && src.quantization_info() != dst.quantization_info()) + { + std::transform(src_ptr, src_ptr + width, dst_ptr + width_offset + offset * width_out, [src, dst](T t) + { + const float dequantized_input = src.quantization_info().dequantize(t); + return dst.quantization_info().quantize(dequantized_input, RoundingPolicy::TO_NEAREST_UP); + }); + src_ptr += width; + } + else + { + std::copy(src_ptr, src_ptr + width, dst_ptr + width_offset + offset * width_out); + src_ptr += width; + } } } } - width_offset += width; } return dst; } -template SimpleTensor<float> widthconcatenate_layer(const std::vector<SimpleTensor<float>> &srcs); -template SimpleTensor<half> widthconcatenate_layer(const std::vector<SimpleTensor<half>> &srcs); -template SimpleTensor<uint8_t> widthconcatenate_layer(const std::vector<SimpleTensor<uint8_t>> &srcs); +template SimpleTensor<float> widthconcatenate_layer(const std::vector<SimpleTensor<float>> &srcs, SimpleTensor<float> &dst); +template SimpleTensor<half> widthconcatenate_layer(const std::vector<SimpleTensor<half>> &srcs, SimpleTensor<half> &dst); +template SimpleTensor<uint8_t> widthconcatenate_layer(const std::vector<SimpleTensor<uint8_t>> &srcs, SimpleTensor<uint8_t> &dst); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/WidthConcatenateLayer.h b/tests/validation/reference/WidthConcatenateLayer.h index 237e72b947..0f1f428f10 100644 --- a/tests/validation/reference/WidthConcatenateLayer.h +++ b/tests/validation/reference/WidthConcatenateLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,7 +37,7 @@ namespace validation namespace reference { template <typename T> -SimpleTensor<T> widthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs); +SimpleTensor<T> widthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs, SimpleTensor<T> &dst); } // namespace reference } // namespace validation } // namespace test |