From 2364dcd7241d730021bf68e000e5a6411b9f09d1 Mon Sep 17 00:00:00 2001 From: Eric Kunze Date: Mon, 26 Apr 2021 11:06:57 -0700 Subject: Initial commit of serialization library code Change-Id: Ie09a7245176aa799e59622e5118b145833b23590 Signed-off-by: Eric Kunze --- src/numpy_utils.cpp | 415 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 415 insertions(+) create mode 100644 src/numpy_utils.cpp (limited to 'src/numpy_utils.cpp') diff --git a/src/numpy_utils.cpp b/src/numpy_utils.cpp new file mode 100644 index 0000000..e438235 --- /dev/null +++ b/src/numpy_utils.cpp @@ -0,0 +1,415 @@ + +// Copyright (c) 2020-2021, ARM Limited. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "numpy_utils.h" + +// Magic NUMPY header +static const char NUMPY_HEADER_STR[] = "\x93NUMPY\x1\x0\x76\x0{"; +static const int NUMPY_HEADER_SZ = 128; + +NumpyUtilities::NPError NumpyUtilities::readFromNpyFile(const char* filename, const uint32_t elems, bool* databuf) +{ + const char dtype_str[] = "'|b1'"; + return readFromNpyFileCommon(filename, dtype_str, 1, elems, databuf, true); +} + +NumpyUtilities::NPError NumpyUtilities::readFromNpyFile(const char* filename, const uint32_t elems, int32_t* databuf) +{ + const char dtype_str[] = "'(databuf); + for (uint32_t i = 0; i < elems; i++) + { + int val = fgetc(infile); + + if (val == EOF) + { + rc = FILE_IO_ERROR; + } + + buf[i] = val; + } + } + else + { + // Now we are at the beginning of the data + // Parse based on the datatype and number of dimensions + if (fread(databuf, elementsize, elems, infile) != elems) + { + rc = FILE_IO_ERROR; + } + } + } + + if (infile) + fclose(infile); + + return rc; +} + +NumpyUtilities::NPError NumpyUtilities::checkNpyHeader(FILE* infile, const uint32_t elems, const char* dtype_str) +{ + char buf[NUMPY_HEADER_SZ + 1]; + char* ptr = nullptr; + NPError rc = NO_ERROR; + bool foundFormat = false; + bool foundOrder = false; + bool foundShape = false; + bool fortranOrder = false; + std::vector shape; + uint32_t totalElems = 1; + char* outer_end = NULL; + + assert(infile); + assert(elems > 0); + + if (fread(buf, NUMPY_HEADER_SZ, 1, infile) != 1) + { + return HEADER_PARSE_ERROR; + } + + if (memcmp(buf, NUMPY_HEADER_STR, sizeof(NUMPY_HEADER_STR) - 1)) + { + return HEADER_PARSE_ERROR; + } + + ptr = strtok_r(buf + sizeof(NUMPY_HEADER_STR) - 1, ":", &outer_end); + + // Read in the data type, order, and shape + while (ptr && (!foundFormat || !foundOrder || !foundShape)) + { + + // End of string? + if (!ptr) + break; + + // Skip whitespace + while (isspace(*ptr)) + ptr++; + + // Parse the dictionary field name + if (!strcmp(ptr, "'descr'")) + { + ptr = strtok_r(NULL, ",", &outer_end); + if (!ptr) + break; + + while (isspace(*ptr)) + ptr++; + + if (strcmp(ptr, dtype_str)) + { + return FILE_TYPE_MISMATCH; + } + + foundFormat = true; + } + else if (!strcmp(ptr, "'fortran_order'")) + { + ptr = strtok_r(NULL, ",", &outer_end); + if (!ptr) + break; + + while (isspace(*ptr)) + ptr++; + + if (!strcmp(ptr, "False")) + { + fortranOrder = false; + } + else + { + return FILE_TYPE_MISMATCH; + } + + foundOrder = true; + } + else if (!strcmp(ptr, "'shape'")) + { + + ptr = strtok_r(NULL, "(", &outer_end); + if (!ptr) + break; + ptr = strtok_r(NULL, ")", &outer_end); + if (!ptr) + break; + + while (isspace(*ptr)) + ptr++; + + // The shape contains N comma-separated integers. Read up to 4. + char* end = NULL; + + ptr = strtok_r(ptr, ",", &end); + for (int i = 0; i < 4; i++) + { + // Out of dimensions + if (!ptr) + break; + + int dim = atoi(ptr); + + // Dimension is 0 + if (dim == 0) + break; + + shape.push_back(dim); + totalElems *= dim; + ptr = strtok_r(NULL, ",", &end); + } + + foundShape = true; + } + else + { + return HEADER_PARSE_ERROR; + } + + if (!ptr) + break; + + ptr = strtok_r(NULL, ":", &outer_end); + } + + if (!foundShape || !foundFormat || !foundOrder) + { + return HEADER_PARSE_ERROR; + } + + // Validate header + if (fortranOrder) + { + return FILE_TYPE_MISMATCH; + } + + if (totalElems != elems) + { + return BUFFER_SIZE_MISMATCH; + } + + // Go back to the begininng and read until the end of the header dictionary + rewind(infile); + int val; + + do + { + val = fgetc(infile); + } while (val != EOF && val != '\n'); + + return rc; +} + +NumpyUtilities::NPError NumpyUtilities::writeToNpyFile(const char* filename, const uint32_t elems, const bool* databuf) +{ + std::vector shape = { (int32_t)elems }; + return writeToNpyFile(filename, shape, databuf); +} + +NumpyUtilities::NPError + NumpyUtilities::writeToNpyFile(const char* filename, const std::vector& shape, const bool* databuf) +{ + const char dtype_str[] = "'|b1'"; + return writeToNpyFileCommon(filename, dtype_str, 1, shape, databuf, true); // bools written as size 1 +} + +NumpyUtilities::NPError + NumpyUtilities::writeToNpyFile(const char* filename, const uint32_t elems, const int32_t* databuf) +{ + std::vector shape = { (int32_t)elems }; + return writeToNpyFile(filename, shape, databuf); +} + +NumpyUtilities::NPError + NumpyUtilities::writeToNpyFile(const char* filename, const std::vector& shape, const int32_t* databuf) +{ + const char dtype_str[] = "' shape = { (int32_t)elems }; + return writeToNpyFile(filename, shape, databuf); +} + +NumpyUtilities::NPError + NumpyUtilities::writeToNpyFile(const char* filename, const std::vector& shape, const int64_t* databuf) +{ + const char dtype_str[] = "' shape = { (int32_t)elems }; + return writeToNpyFile(filename, shape, databuf); +} + +NumpyUtilities::NPError + NumpyUtilities::writeToNpyFile(const char* filename, const std::vector& shape, const float* databuf) +{ + const char dtype_str[] = "'& shape, + const void* databuf, + bool bool_translate) +{ + FILE* outfile = nullptr; + NPError rc = NO_ERROR; + uint32_t totalElems = 1; + + assert(filename); + assert(shape.size() >= 0); + assert(databuf); + + outfile = fopen(filename, "wb"); + + if (!outfile) + { + return FILE_NOT_FOUND; + } + + for (uint32_t i = 0; i < shape.size(); i++) + { + totalElems *= shape[i]; + } + + rc = writeNpyHeader(outfile, shape, dtype_str); + + if (rc == NO_ERROR) + { + if (bool_translate) + { + // Numpy save format stores booleans as a byte array + // with one byte per boolean. This somewhat inefficiently + // remaps from system bool[] to this format. + const bool* buf = reinterpret_cast(databuf); + for (uint32_t i = 0; i < totalElems; i++) + { + int val = buf[i] ? 1 : 0; + if (fputc(val, outfile) == EOF) + { + rc = FILE_IO_ERROR; + } + } + } + else + { + if (fwrite(databuf, elementsize, totalElems, outfile) != totalElems) + { + rc = FILE_IO_ERROR; + } + } + } + + if (outfile) + fclose(outfile); + + return rc; +} + +NumpyUtilities::NPError + NumpyUtilities::writeNpyHeader(FILE* outfile, const std::vector& shape, const char* dtype_str) +{ + NPError rc = NO_ERROR; + uint32_t i; + char header[NUMPY_HEADER_SZ + 1]; + int headerPos = 0; + + assert(outfile); + assert(shape.size() >= 0); + + // Space-fill the header and end with a newline to start per numpy spec + memset(header, 0x20, NUMPY_HEADER_SZ); + header[NUMPY_HEADER_SZ - 1] = '\n'; + header[NUMPY_HEADER_SZ] = 0; + + // Write out the hard-coded header. We only support a 128-byte 1.0 header + // for now, which should be sufficient for simple tensor types of any + // reasonable rank. + memcpy(header, NUMPY_HEADER_STR, sizeof(NUMPY_HEADER_STR) - 1); + headerPos += sizeof(NUMPY_HEADER_STR) - 1; + + // Output the format dictionary + // Hard-coded for I32 for now + headerPos += + snprintf(header + headerPos, NUMPY_HEADER_SZ - headerPos, "'descr': %s, 'fortran_order': False, 'shape': (%d,", + dtype_str, shape.empty() ? 1 : shape[0]); + + // Remainder of shape array + for (i = 1; i < shape.size(); i++) + { + headerPos += snprintf(header + headerPos, NUMPY_HEADER_SZ - headerPos, " %d,", shape[i]); + } + + // Close off the dictionary + headerPos += snprintf(header + headerPos, NUMPY_HEADER_SZ - headerPos, "), }"); + + // snprintf leaves a NULL at the end. Replace with a space + header[headerPos] = 0x20; + + if (fwrite(header, NUMPY_HEADER_SZ, 1, outfile) != 1) + { + rc = FILE_IO_ERROR; + } + + return rc; +} -- cgit v1.2.1