From a25d16c86f0d870408bc8b941aa755093417b0f0 Mon Sep 17 00:00:00 2001 From: Vidhya Sudhan Loganathan Date: Fri, 16 Nov 2018 11:33:12 +0000 Subject: COMPMID-1266 : Add support for FP16 in CLWinogradConvolutionLayer: 5x5 kernels Introduced F32 accumulation for F16 winograd gemm and output transform WinogradConvolution will be available for F16 only if fast math flag is enabled Change-Id: I215593c205236a0f9669218437bb40b184ec6a4f --- tests/SimpleTensor.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'tests/SimpleTensor.h') diff --git a/tests/SimpleTensor.h b/tests/SimpleTensor.h index 335ef9130a..dd4a8bee2c 100644 --- a/tests/SimpleTensor.h +++ b/tests/SimpleTensor.h @@ -220,6 +220,45 @@ protected: DataLayout _data_layout{ DataLayout::UNKNOWN }; }; +template +SimpleTensor copy_tensor(const SimpleTensor &tensor) +{ + SimpleTensor st(tensor.shape(), tensor.data_type(), + tensor.num_channels(), + tensor.quantization_info(), + tensor.data_layout()); + for(size_t n = 0; n < size_t(st.num_elements()); n++) + { + st.data()[n] = static_cast(tensor.data()[n]); + } + return st; +} + +template ::value, int>::type = 0> +SimpleTensor copy_tensor(const SimpleTensor &tensor) +{ + SimpleTensor st(tensor.shape(), tensor.data_type(), + tensor.num_channels(), + tensor.quantization_info(), + tensor.data_layout()); + memcpy((void *)st.data(), (const void *)tensor.data(), size_t(st.num_elements() * sizeof(T1))); + return st; +} + +template < typename T1, typename T2, typename std::enable_if < (std::is_same::value || std::is_same::value), int >::type = 0 > +SimpleTensor copy_tensor(const SimpleTensor &tensor) +{ + SimpleTensor st(tensor.shape(), tensor.data_type(), + tensor.num_channels(), + tensor.quantization_info(), + tensor.data_layout()); + for(size_t n = 0; n < size_t(st.num_elements()); n++) + { + st.data()[n] = half_float::detail::half_cast(tensor.data()[n]); + } + return st; +} + template SimpleTensor::SimpleTensor(TensorShape shape, Format format) : _buffer(nullptr), -- cgit v1.2.1