ArmNN
 20.08
InferenceTestImage.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #include "InferenceTestImage.hpp"
6 
9 
10 #include <boost/format.hpp>
11 #include <boost/numeric/conversion/cast.hpp>
12 
13 #include <array>
14 
15 #define STB_IMAGE_IMPLEMENTATION
16 #include <stb/stb_image.h>
17 
18 #define STB_IMAGE_RESIZE_IMPLEMENTATION
19 #include <stb/stb_image_resize.h>
20 
21 #define STB_IMAGE_WRITE_IMPLEMENTATION
22 #include <stb/stb_image_write.h>
23 
24 namespace
25 {
26 
27 unsigned int GetImageChannelIndex(ImageChannelLayout channelLayout, ImageChannel channel)
28 {
29  switch (channelLayout)
30  {
32  return static_cast<unsigned int>(channel);
34  return 2u - static_cast<unsigned int>(channel);
35  default:
36  throw UnknownImageChannelLayout(boost::str(boost::format("Unknown layout %1%")
37  % static_cast<int>(channelLayout)));
38  }
39 }
40 
41 inline float Lerp(float a, float b, float w)
42 {
43  return w * b + (1.f - w) * a;
44 }
45 
46 inline void PutData(std::vector<float> & data,
47  const unsigned int width,
48  const unsigned int x,
49  const unsigned int y,
50  const unsigned int c,
51  float value)
52 {
53  data[(3*((y*width)+x)) + c] = value;
54 }
55 
56 std::vector<float> ResizeBilinearAndNormalize(const InferenceTestImage & image,
57  const unsigned int outputWidth,
58  const unsigned int outputHeight,
59  const float scale,
60  const std::array<float, 3>& mean,
61  const std::array<float, 3>& stddev)
62 {
63  std::vector<float> out;
64  out.resize(outputWidth * outputHeight * 3);
65 
66  // We follow the definition of TensorFlow and AndroidNN: the top-left corner of a texel in the output
67  // image is projected into the input image to figure out the interpolants and weights. Note that this
68  // will yield different results than if projecting the centre of output texels.
69 
70  const unsigned int inputWidth = image.GetWidth();
71  const unsigned int inputHeight = image.GetHeight();
72 
73  // How much to scale pixel coordinates in the output image to get the corresponding pixel coordinates
74  // in the input image.
75  const float scaleY = boost::numeric_cast<float>(inputHeight) / boost::numeric_cast<float>(outputHeight);
76  const float scaleX = boost::numeric_cast<float>(inputWidth) / boost::numeric_cast<float>(outputWidth);
77 
78  uint8_t rgb_x0y0[3];
79  uint8_t rgb_x1y0[3];
80  uint8_t rgb_x0y1[3];
81  uint8_t rgb_x1y1[3];
82 
83  for (unsigned int y = 0; y < outputHeight; ++y)
84  {
85  // Corresponding real-valued height coordinate in input image.
86  const float iy = boost::numeric_cast<float>(y) * scaleY;
87 
88  // Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation).
89  const float fiy = floorf(iy);
90  const unsigned int y0 = boost::numeric_cast<unsigned int>(fiy);
91 
92  // Interpolation weight (range [0,1])
93  const float yw = iy - fiy;
94 
95  for (unsigned int x = 0; x < outputWidth; ++x)
96  {
97  // Real-valued and discrete width coordinates in input image.
98  const float ix = boost::numeric_cast<float>(x) * scaleX;
99  const float fix = floorf(ix);
100  const unsigned int x0 = boost::numeric_cast<unsigned int>(fix);
101 
102  // Interpolation weight (range [0,1]).
103  const float xw = ix - fix;
104 
105  // Discrete width/height coordinates of texels below and to the right of (x0, y0).
106  const unsigned int x1 = std::min(x0 + 1, inputWidth - 1u);
107  const unsigned int y1 = std::min(y0 + 1, inputHeight - 1u);
108 
109  std::tie(rgb_x0y0[0], rgb_x0y0[1], rgb_x0y0[2]) = image.GetPixelAs3Channels(x0, y0);
110  std::tie(rgb_x1y0[0], rgb_x1y0[1], rgb_x1y0[2]) = image.GetPixelAs3Channels(x1, y0);
111  std::tie(rgb_x0y1[0], rgb_x0y1[1], rgb_x0y1[2]) = image.GetPixelAs3Channels(x0, y1);
112  std::tie(rgb_x1y1[0], rgb_x1y1[1], rgb_x1y1[2]) = image.GetPixelAs3Channels(x1, y1);
113 
114  for (unsigned c=0; c<3; ++c)
115  {
116  const float ly0 = Lerp(float(rgb_x0y0[c]), float(rgb_x1y0[c]), xw);
117  const float ly1 = Lerp(float(rgb_x0y1[c]), float(rgb_x1y1[c]), xw);
118  const float l = Lerp(ly0, ly1, yw);
119  PutData(out, outputWidth, x, y, c, ((l / scale) - mean[c]) / stddev[c]);
120  }
121  }
122  }
123  return out;
124 }
125 
126 } // namespace
127 
129  : m_Width(0u)
130  , m_Height(0u)
131  , m_NumChannels(0u)
132 {
133  int width;
134  int height;
135  int channels;
136 
137  using StbImageDataPtr = std::unique_ptr<unsigned char, decltype(&stbi_image_free)>;
138  StbImageDataPtr stbData(stbi_load(filePath, &width, &height, &channels, 0), &stbi_image_free);
139 
140  if (stbData == nullptr)
141  {
142  throw InferenceTestImageLoadFailed(boost::str(boost::format("Could not load the image at %1%") % filePath));
143  }
144 
145  if (width == 0 || height == 0)
146  {
147  throw InferenceTestImageLoadFailed(boost::str(boost::format("Could not load empty image at %1%") % filePath));
148  }
149 
150  m_Width = boost::numeric_cast<unsigned int>(width);
151  m_Height = boost::numeric_cast<unsigned int>(height);
152  m_NumChannels = boost::numeric_cast<unsigned int>(channels);
153 
154  const unsigned int sizeInBytes = GetSizeInBytes();
155  m_Data.resize(sizeInBytes);
156  memcpy(m_Data.data(), stbData.get(), sizeInBytes);
157 }
158 
159 std::tuple<uint8_t, uint8_t, uint8_t> InferenceTestImage::GetPixelAs3Channels(unsigned int x, unsigned int y) const
160 {
161  if (x >= m_Width || y >= m_Height)
162  {
163  throw InferenceTestImageOutOfBoundsAccess(boost::str(boost::format("Attempted out of bounds image access. "
164  "Requested (%1%, %2%). Maximum valid coordinates (%3%, %4%).") % x % y % (m_Width - 1) % (m_Height - 1)));
165  }
166 
167  const unsigned int pixelOffset = x * GetNumChannels() + y * GetWidth() * GetNumChannels();
168  const uint8_t* const pixelData = m_Data.data() + pixelOffset;
169  ARMNN_ASSERT(pixelData <= (m_Data.data() + GetSizeInBytes()));
170 
171  std::array<uint8_t, 3> outPixelData;
172  outPixelData.fill(0);
173 
174  const unsigned int maxChannelsInPixel = std::min(GetNumChannels(), static_cast<unsigned int>(outPixelData.size()));
175  for (unsigned int c = 0; c < maxChannelsInPixel; ++c)
176  {
177  outPixelData[c] = pixelData[c];
178  }
179 
180  return std::make_tuple(outPixelData[0], outPixelData[1], outPixelData[2]);
181 }
182 
183 
184 void InferenceTestImage::StbResize(InferenceTestImage& im, const unsigned int newWidth, const unsigned int newHeight)
185 {
186  std::vector<uint8_t> newData;
187  newData.resize(newWidth * newHeight * im.GetNumChannels() * im.GetSingleElementSizeInBytes());
188 
189  // boost::numeric_cast<>() is used for user-provided data (protecting about overflows).
190  // static_cast<> is ok for internal data (assumes that, when internal data was originally provided by a user,
191  // a boost::numeric_cast<>() handled the conversion).
192  const int nW = boost::numeric_cast<int>(newWidth);
193  const int nH = boost::numeric_cast<int>(newHeight);
194 
195  const int w = static_cast<int>(im.GetWidth());
196  const int h = static_cast<int>(im.GetHeight());
197  const int numChannels = static_cast<int>(im.GetNumChannels());
198 
199  const int res = stbir_resize_uint8(im.m_Data.data(), w, h, 0, newData.data(), nW, nH, 0, numChannels);
200  if (res == 0)
201  {
202  throw InferenceTestImageResizeFailed("The resizing operation failed");
203  }
204 
205  im.m_Data.swap(newData);
206  im.m_Width = newWidth;
207  im.m_Height = newHeight;
208 }
209 
210 std::vector<float> InferenceTestImage::Resize(unsigned int newWidth,
211  unsigned int newHeight,
212  const armnn::CheckLocation& location,
213  const ResizingMethods meth,
214  const std::array<float, 3>& mean,
215  const std::array<float, 3>& stddev,
216  const float scale)
217 {
218  std::vector<float> out;
219  if (newWidth == 0 || newHeight == 0)
220  {
221  throw InferenceTestImageResizeFailed(boost::str(boost::format("None of the dimensions passed to a resize "
222  "operation can be zero. Requested width: %1%. Requested height: %2%.") % newWidth % newHeight));
223  }
224 
225  switch (meth) {
227  {
228  StbResize(*this, newWidth, newHeight);
229  break;
230  }
232  {
233  out = ResizeBilinearAndNormalize(*this, newWidth, newHeight, scale, mean, stddev);
234  break;
235  }
236  default:
237  throw InferenceTestImageResizeFailed(boost::str(
238  boost::format("Unknown resizing method asked ArmNN only supports {STB, BilinearAndNormalized} %1%")
239  % location.AsString()));
240  }
241  return out;
242 }
243 
244 void InferenceTestImage::Write(WriteFormat format, const char* filePath) const
245 {
246  const int w = static_cast<int>(GetWidth());
247  const int h = static_cast<int>(GetHeight());
248  const int numChannels = static_cast<int>(GetNumChannels());
249  int res = 0;
250 
251  switch (format)
252  {
253  case WriteFormat::Png:
254  {
255  res = stbi_write_png(filePath, w, h, numChannels, m_Data.data(), 0);
256  break;
257  }
258  case WriteFormat::Bmp:
259  {
260  res = stbi_write_bmp(filePath, w, h, numChannels, m_Data.data());
261  break;
262  }
263  case WriteFormat::Tga:
264  {
265  res = stbi_write_tga(filePath, w, h, numChannels, m_Data.data());
266  break;
267  }
268  default:
269  throw InferenceTestImageWriteFailed(boost::str(boost::format("Unknown format %1%")
270  % static_cast<int>(format)));
271  }
272 
273  if (res == 0)
274  {
275  throw InferenceTestImageWriteFailed(boost::str(boost::format("An error occurred when writing to file %1%")
276  % filePath));
277  }
278 }
279 
280 template <typename TProcessValueCallable>
281 std::vector<float> GetImageDataInArmNnLayoutAsFloats(ImageChannelLayout channelLayout,
282  const InferenceTestImage& image,
283  TProcessValueCallable processValue)
284 {
285  const unsigned int h = image.GetHeight();
286  const unsigned int w = image.GetWidth();
287 
288  std::vector<float> imageData;
289  imageData.resize(h * w * 3);
290 
291  for (unsigned int j = 0; j < h; ++j)
292  {
293  for (unsigned int i = 0; i < w; ++i)
294  {
295  uint8_t r, g, b;
296  std::tie(r, g, b) = image.GetPixelAs3Channels(i, j);
297 
298  // ArmNN order: C, H, W
299  const unsigned int rDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::R) * h * w + j * w + i;
300  const unsigned int gDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::G) * h * w + j * w + i;
301  const unsigned int bDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::B) * h * w + j * w + i;
302 
303  imageData[rDstIndex] = processValue(ImageChannel::R, float(r));
304  imageData[gDstIndex] = processValue(ImageChannel::G, float(g));
305  imageData[bDstIndex] = processValue(ImageChannel::B, float(b));
306  }
307  }
308 
309  return imageData;
310 }
311 
313  const InferenceTestImage& image)
314 {
315  return GetImageDataInArmNnLayoutAsFloats(layout, image,
316  [](ImageChannel channel, float value)
317  {
318  armnn::IgnoreUnused(channel);
319  return value / 255.f;
320  });
321 }
322 
324  const InferenceTestImage& image,
325  const std::array<float, 3>& mean)
326 {
327  return GetImageDataInArmNnLayoutAsFloats(layout, image,
328  [layout, &mean](ImageChannel channel, float value)
329  {
330  const unsigned int channelIndex = GetImageChannelIndex(layout, channel);
331  return value - mean[channelIndex];
332  });
333 }
334 
336  const InferenceTestImage& image)
337 {
338  std::vector<float> imageData;
339  const unsigned int h = image.GetHeight();
340  const unsigned int w = image.GetWidth();
341 
342  const unsigned int rDstIndex = GetImageChannelIndex(layout, ImageChannel::R);
343  const unsigned int gDstIndex = GetImageChannelIndex(layout, ImageChannel::G);
344  const unsigned int bDstIndex = GetImageChannelIndex(layout, ImageChannel::B);
345 
346  imageData.resize(h * w * 3);
347  unsigned int offset = 0;
348 
349  for (unsigned int j = 0; j < h; ++j)
350  {
351  for (unsigned int i = 0; i < w; ++i)
352  {
353  uint8_t r, g, b;
354  std::tie(r, g, b) = image.GetPixelAs3Channels(i, j);
355 
356  imageData[offset+rDstIndex] = float(r) / 255.0f;
357  imageData[offset+gDstIndex] = float(g) / 255.0f;
358  imageData[offset+bDstIndex] = float(b) / 255.0f;
359  offset += 3;
360  }
361  }
362 
363  return imageData;
364 }
void Write(WriteFormat format, const char *filePath) const
unsigned int GetNumChannels() const
std::string AsString() const
Definition: Exceptions.hpp:29
void IgnoreUnused(Ts &&...)
std::vector< float > GetImageDataInArmNnLayoutAsNormalizedFloats(ImageChannelLayout layout, const InferenceTestImage &image)
unsigned int GetWidth() const
std::vector< float > GetImageDataInArmNnLayoutAsFloatsSubtractingMean(ImageChannelLayout layout, const InferenceTestImage &image, const std::array< float, 3 > &mean)
void StbResize(InferenceTestImage &im, const unsigned int newWidth, const unsigned int newHeight)
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:33
std::vector< float > GetImageDataAsNormalizedFloats(ImageChannelLayout layout, const InferenceTestImage &image)
std::tuple< uint8_t, uint8_t, uint8_t > GetPixelAs3Channels(unsigned int x, unsigned int y) const
unsigned int GetSizeInBytes() const
unsigned int GetHeight() const
ImageChannelLayout
std::vector< float > Resize(unsigned int newWidth, unsigned int newHeight, const armnn::CheckLocation &location, const ResizingMethods meth=ResizingMethods::STB, const std::array< float, 3 > &mean={{0.0, 0.0, 0.0}}, const std::array< float, 3 > &stddev={{1.0, 1.0, 1.0}}, const float scale=255.0f)
InferenceTestImage(const char *filePath)
std::vector< float > GetImageDataInArmNnLayoutAsFloats(ImageChannelLayout channelLayout, const InferenceTestImage &image, TProcessValueCallable processValue)