ArmNN
 22.05
InferenceTestImage.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #include "InferenceTestImage.hpp"
6 
10 
11 #include <fmt/format.h>
12 
13 #include <array>
14 
15 #define STB_IMAGE_IMPLEMENTATION
16 #include <stb/stb_image.h>
17 
18 #define STB_IMAGE_RESIZE_IMPLEMENTATION
19 #include <stb/stb_image_resize.h>
20 
21 #define STB_IMAGE_WRITE_IMPLEMENTATION
22 #include <stb/stb_image_write.h>
23 
24 namespace
25 {
26 
27 unsigned int GetImageChannelIndex(ImageChannelLayout channelLayout, ImageChannel channel)
28 {
29  switch (channelLayout)
30  {
32  return static_cast<unsigned int>(channel);
34  return 2u - static_cast<unsigned int>(channel);
35  default:
36  throw UnknownImageChannelLayout(fmt::format("Unknown layout {}", static_cast<int>(channelLayout)));
37  }
38 }
39 
40 inline float Lerp(float a, float b, float w)
41 {
42  return w * b + (1.f - w) * a;
43 }
44 
45 inline void PutData(std::vector<float> & data,
46  const unsigned int width,
47  const unsigned int x,
48  const unsigned int y,
49  const unsigned int c,
50  float value)
51 {
52  data[(3*((y*width)+x)) + c] = value;
53 }
54 
55 std::vector<float> ResizeBilinearAndNormalize(const InferenceTestImage & image,
56  const unsigned int outputWidth,
57  const unsigned int outputHeight,
58  const float scale,
59  const std::array<float, 3>& mean,
60  const std::array<float, 3>& stddev)
61 {
62  std::vector<float> out;
63  out.resize(outputWidth * outputHeight * 3);
64 
65  // We follow the definition of TensorFlow and AndroidNN: the top-left corner of a texel in the output
66  // image is projected into the input image to figure out the interpolants and weights. Note that this
67  // will yield different results than if projecting the centre of output texels.
68 
69  const unsigned int inputWidth = image.GetWidth();
70  const unsigned int inputHeight = image.GetHeight();
71 
72  // How much to scale pixel coordinates in the output image to get the corresponding pixel coordinates
73  // in the input image.
74  const float scaleY = armnn::numeric_cast<float>(inputHeight) / armnn::numeric_cast<float>(outputHeight);
75  const float scaleX = armnn::numeric_cast<float>(inputWidth) / armnn::numeric_cast<float>(outputWidth);
76 
77  uint8_t rgb_x0y0[3];
78  uint8_t rgb_x1y0[3];
79  uint8_t rgb_x0y1[3];
80  uint8_t rgb_x1y1[3];
81 
82  for (unsigned int y = 0; y < outputHeight; ++y)
83  {
84  // Corresponding real-valued height coordinate in input image.
85  const float iy = armnn::numeric_cast<float>(y) * scaleY;
86 
87  // Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation).
88  const float fiy = floorf(iy);
89  const unsigned int y0 = armnn::numeric_cast<unsigned int>(fiy);
90 
91  // Interpolation weight (range [0,1])
92  const float yw = iy - fiy;
93 
94  for (unsigned int x = 0; x < outputWidth; ++x)
95  {
96  // Real-valued and discrete width coordinates in input image.
97  const float ix = armnn::numeric_cast<float>(x) * scaleX;
98  const float fix = floorf(ix);
99  const unsigned int x0 = armnn::numeric_cast<unsigned int>(fix);
100 
101  // Interpolation weight (range [0,1]).
102  const float xw = ix - fix;
103 
104  // Discrete width/height coordinates of texels below and to the right of (x0, y0).
105  const unsigned int x1 = std::min(x0 + 1, inputWidth - 1u);
106  const unsigned int y1 = std::min(y0 + 1, inputHeight - 1u);
107 
108  std::tie(rgb_x0y0[0], rgb_x0y0[1], rgb_x0y0[2]) = image.GetPixelAs3Channels(x0, y0);
109  std::tie(rgb_x1y0[0], rgb_x1y0[1], rgb_x1y0[2]) = image.GetPixelAs3Channels(x1, y0);
110  std::tie(rgb_x0y1[0], rgb_x0y1[1], rgb_x0y1[2]) = image.GetPixelAs3Channels(x0, y1);
111  std::tie(rgb_x1y1[0], rgb_x1y1[1], rgb_x1y1[2]) = image.GetPixelAs3Channels(x1, y1);
112 
113  for (unsigned c=0; c<3; ++c)
114  {
115  const float ly0 = Lerp(float(rgb_x0y0[c]), float(rgb_x1y0[c]), xw);
116  const float ly1 = Lerp(float(rgb_x0y1[c]), float(rgb_x1y1[c]), xw);
117  const float l = Lerp(ly0, ly1, yw);
118  PutData(out, outputWidth, x, y, c, ((l / scale) - mean[c]) / stddev[c]);
119  }
120  }
121  }
122  return out;
123 }
124 
125 } // namespace
126 
128  : m_Width(0u)
129  , m_Height(0u)
130  , m_NumChannels(0u)
131 {
132  int width;
133  int height;
134  int channels;
135 
136  using StbImageDataPtr = std::unique_ptr<unsigned char, decltype(&stbi_image_free)>;
137  StbImageDataPtr stbData(stbi_load(filePath, &width, &height, &channels, 0), &stbi_image_free);
138 
139  if (stbData == nullptr)
140  {
141  throw InferenceTestImageLoadFailed(fmt::format("Could not load the image at {}", filePath));
142  }
143 
144  if (width == 0 || height == 0)
145  {
146  throw InferenceTestImageLoadFailed(fmt::format("Could not load empty image at {}", filePath));
147  }
148 
149  m_Width = armnn::numeric_cast<unsigned int>(width);
150  m_Height = armnn::numeric_cast<unsigned int>(height);
151  m_NumChannels = armnn::numeric_cast<unsigned int>(channels);
152 
153  const unsigned int sizeInBytes = GetSizeInBytes();
154  m_Data.resize(sizeInBytes);
155  memcpy(m_Data.data(), stbData.get(), sizeInBytes);
156 }
157 
158 std::tuple<uint8_t, uint8_t, uint8_t> InferenceTestImage::GetPixelAs3Channels(unsigned int x, unsigned int y) const
159 {
160  if (x >= m_Width || y >= m_Height)
161  {
162  throw InferenceTestImageOutOfBoundsAccess(fmt::format("Attempted out of bounds image access. "
163  "Requested ({0}, {1}). Maximum valid coordinates ({2}, {3}).", x, y, (m_Width - 1), (m_Height - 1)));
164  }
165 
166  const unsigned int pixelOffset = x * GetNumChannels() + y * GetWidth() * GetNumChannels();
167  const uint8_t* const pixelData = m_Data.data() + pixelOffset;
168  ARMNN_ASSERT(pixelData <= (m_Data.data() + GetSizeInBytes()));
169 
170  std::array<uint8_t, 3> outPixelData;
171  outPixelData.fill(0);
172 
173  const unsigned int maxChannelsInPixel = std::min(GetNumChannels(), static_cast<unsigned int>(outPixelData.size()));
174  for (unsigned int c = 0; c < maxChannelsInPixel; ++c)
175  {
176  outPixelData[c] = pixelData[c];
177  }
178 
179  return std::make_tuple(outPixelData[0], outPixelData[1], outPixelData[2]);
180 }
181 
182 
183 void InferenceTestImage::StbResize(InferenceTestImage& im, const unsigned int newWidth, const unsigned int newHeight)
184 {
185  std::vector<uint8_t> newData;
186  newData.resize(newWidth * newHeight * im.GetNumChannels() * im.GetSingleElementSizeInBytes());
187 
188  // armnn::numeric_cast<>() is used for user-provided data (protecting about overflows).
189  // static_cast<> is ok for internal data (assumes that, when internal data was originally provided by a user,
190  // a armnn::numeric_cast<>() handled the conversion).
191  const int nW = armnn::numeric_cast<int>(newWidth);
192  const int nH = armnn::numeric_cast<int>(newHeight);
193 
194  const int w = static_cast<int>(im.GetWidth());
195  const int h = static_cast<int>(im.GetHeight());
196  const int numChannels = static_cast<int>(im.GetNumChannels());
197 
198  const int res = stbir_resize_uint8(im.m_Data.data(), w, h, 0, newData.data(), nW, nH, 0, numChannels);
199  if (res == 0)
200  {
201  throw InferenceTestImageResizeFailed("The resizing operation failed");
202  }
203 
204  im.m_Data.swap(newData);
205  im.m_Width = newWidth;
206  im.m_Height = newHeight;
207 }
208 
209 std::vector<float> InferenceTestImage::Resize(unsigned int newWidth,
210  unsigned int newHeight,
211  const armnn::CheckLocation& location,
212  const ResizingMethods meth,
213  const std::array<float, 3>& mean,
214  const std::array<float, 3>& stddev,
215  const float scale)
216 {
217  std::vector<float> out;
218  if (newWidth == 0 || newHeight == 0)
219  {
220  throw InferenceTestImageResizeFailed(fmt::format("None of the dimensions passed to a resize "
221  "operation can be zero. Requested width: {0}. Requested height: {1}.", newWidth, newHeight));
222  }
223 
224  switch (meth) {
226  {
227  StbResize(*this, newWidth, newHeight);
228  break;
229  }
231  {
232  out = ResizeBilinearAndNormalize(*this, newWidth, newHeight, scale, mean, stddev);
233  break;
234  }
235  default:
236  throw InferenceTestImageResizeFailed(fmt::format("Unknown resizing method asked ArmNN only"
237  " supports {STB, BilinearAndNormalized} {}",
238  location.AsString()));
239  }
240  return out;
241 }
242 
243 void InferenceTestImage::Write(WriteFormat format, const char* filePath) const
244 {
245  const int w = static_cast<int>(GetWidth());
246  const int h = static_cast<int>(GetHeight());
247  const int numChannels = static_cast<int>(GetNumChannels());
248  int res = 0;
249 
250  switch (format)
251  {
252  case WriteFormat::Png:
253  {
254  res = stbi_write_png(filePath, w, h, numChannels, m_Data.data(), 0);
255  break;
256  }
257  case WriteFormat::Bmp:
258  {
259  res = stbi_write_bmp(filePath, w, h, numChannels, m_Data.data());
260  break;
261  }
262  case WriteFormat::Tga:
263  {
264  res = stbi_write_tga(filePath, w, h, numChannels, m_Data.data());
265  break;
266  }
267  default:
268  throw InferenceTestImageWriteFailed(fmt::format("Unknown format {}", static_cast<int>(format)));
269  }
270 
271  if (res == 0)
272  {
273  throw InferenceTestImageWriteFailed(fmt::format("An error occurred when writing to file {}",
274  filePath));
275  }
276 }
277 
278 template <typename TProcessValueCallable>
279 std::vector<float> GetImageDataInArmNnLayoutAsFloats(ImageChannelLayout channelLayout,
280  const InferenceTestImage& image,
281  TProcessValueCallable processValue)
282 {
283  const unsigned int h = image.GetHeight();
284  const unsigned int w = image.GetWidth();
285 
286  std::vector<float> imageData;
287  imageData.resize(h * w * 3);
288 
289  for (unsigned int j = 0; j < h; ++j)
290  {
291  for (unsigned int i = 0; i < w; ++i)
292  {
293  uint8_t r, g, b;
294  std::tie(r, g, b) = image.GetPixelAs3Channels(i, j);
295 
296  // ArmNN order: C, H, W
297  const unsigned int rDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::R) * h * w + j * w + i;
298  const unsigned int gDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::G) * h * w + j * w + i;
299  const unsigned int bDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::B) * h * w + j * w + i;
300 
301  imageData[rDstIndex] = processValue(ImageChannel::R, float(r));
302  imageData[gDstIndex] = processValue(ImageChannel::G, float(g));
303  imageData[bDstIndex] = processValue(ImageChannel::B, float(b));
304  }
305  }
306 
307  return imageData;
308 }
309 
311  const InferenceTestImage& image)
312 {
313  return GetImageDataInArmNnLayoutAsFloats(layout, image,
314  [](ImageChannel channel, float value)
315  {
316  armnn::IgnoreUnused(channel);
317  return value / 255.f;
318  });
319 }
320 
322  const InferenceTestImage& image,
323  const std::array<float, 3>& mean)
324 {
325  return GetImageDataInArmNnLayoutAsFloats(layout, image,
326  [layout, &mean](ImageChannel channel, float value)
327  {
328  const unsigned int channelIndex = GetImageChannelIndex(layout, channel);
329  return value - mean[channelIndex];
330  });
331 }
332 
334  const InferenceTestImage& image)
335 {
336  std::vector<float> imageData;
337  const unsigned int h = image.GetHeight();
338  const unsigned int w = image.GetWidth();
339 
340  const unsigned int rDstIndex = GetImageChannelIndex(layout, ImageChannel::R);
341  const unsigned int gDstIndex = GetImageChannelIndex(layout, ImageChannel::G);
342  const unsigned int bDstIndex = GetImageChannelIndex(layout, ImageChannel::B);
343 
344  imageData.resize(h * w * 3);
345  unsigned int offset = 0;
346 
347  for (unsigned int j = 0; j < h; ++j)
348  {
349  for (unsigned int i = 0; i < w; ++i)
350  {
351  uint8_t r, g, b;
352  std::tie(r, g, b) = image.GetPixelAs3Channels(i, j);
353 
354  imageData[offset+rDstIndex] = float(r) / 255.0f;
355  imageData[offset+gDstIndex] = float(g) / 255.0f;
356  imageData[offset+bDstIndex] = float(b) / 255.0f;
357  offset += 3;
358  }
359  }
360 
361  return imageData;
362 }
void Write(WriteFormat format, const char *filePath) const
unsigned int GetNumChannels() const
std::string AsString() const
Definition: Exceptions.hpp:29
void IgnoreUnused(Ts &&...)
std::vector< float > GetImageDataInArmNnLayoutAsNormalizedFloats(ImageChannelLayout layout, const InferenceTestImage &image)
unsigned int GetWidth() const
std::vector< float > GetImageDataInArmNnLayoutAsFloatsSubtractingMean(ImageChannelLayout layout, const InferenceTestImage &image, const std::array< float, 3 > &mean)
void StbResize(InferenceTestImage &im, const unsigned int newWidth, const unsigned int newHeight)
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
std::vector< float > GetImageDataAsNormalizedFloats(ImageChannelLayout layout, const InferenceTestImage &image)
std::tuple< uint8_t, uint8_t, uint8_t > GetPixelAs3Channels(unsigned int x, unsigned int y) const
unsigned int GetSizeInBytes() const
unsigned int GetHeight() const
ImageChannelLayout
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35
std::vector< float > Resize(unsigned int newWidth, unsigned int newHeight, const armnn::CheckLocation &location, const ResizingMethods meth=ResizingMethods::STB, const std::array< float, 3 > &mean={{0.0, 0.0, 0.0}}, const std::array< float, 3 > &stddev={{1.0, 1.0, 1.0}}, const float scale=255.0f)
InferenceTestImage(const char *filePath)
std::vector< float > GetImageDataInArmNnLayoutAsFloats(ImageChannelLayout channelLayout, const InferenceTestImage &image, TProcessValueCallable processValue)