From 145c88f851d12d2cadc2f080d232c1d5963d6e47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89anna=20=C3=93=20Cath=C3=A1in?= Date: Mon, 16 Nov 2020 14:12:11 +0000 Subject: MLECO-1253 Adding ASR sample application using the PyArmNN api MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I450b23800ca316a5bfd4608c8559cf4f11271c21 Signed-off-by: Éanna Ó Catháin --- python/pyarmnn/examples/common/cv_utils.py | 184 +++++++++++++++++++++ python/pyarmnn/examples/common/network_executor.py | 108 ++++++++++++ python/pyarmnn/examples/common/tests/conftest.py | 40 +++++ python/pyarmnn/examples/common/tests/context.py | 7 + .../examples/common/tests/test_network_executor.py | 24 +++ python/pyarmnn/examples/common/tests/test_utils.py | 19 +++ python/pyarmnn/examples/common/utils.py | 41 +++++ 7 files changed, 423 insertions(+) create mode 100644 python/pyarmnn/examples/common/cv_utils.py create mode 100644 python/pyarmnn/examples/common/network_executor.py create mode 100644 python/pyarmnn/examples/common/tests/conftest.py create mode 100644 python/pyarmnn/examples/common/tests/context.py create mode 100644 python/pyarmnn/examples/common/tests/test_network_executor.py create mode 100644 python/pyarmnn/examples/common/tests/test_utils.py create mode 100644 python/pyarmnn/examples/common/utils.py (limited to 'python/pyarmnn/examples/common') diff --git a/python/pyarmnn/examples/common/cv_utils.py b/python/pyarmnn/examples/common/cv_utils.py new file mode 100644 index 0000000000..61aa46c3d7 --- /dev/null +++ b/python/pyarmnn/examples/common/cv_utils.py @@ -0,0 +1,184 @@ +# Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +# SPDX-License-Identifier: MIT + +""" +This file contains helper functions for reading video/image data and + pre/postprocessing of video/image data using OpenCV. +""" + +import os + +import cv2 +import numpy as np + +import pyarmnn as ann + + +def preprocess(frame: np.ndarray, input_binding_info: tuple): + """ + Takes a frame, resizes, swaps channels and converts data type to match + model input layer. The converted frame is wrapped in a const tensor + and bound to the input tensor. + + Args: + frame: Captured frame from video. + input_binding_info: Contains shape and data type of model input layer. + + Returns: + Input tensor. + """ + # Swap channels and resize frame to model resolution + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + resized_frame = resize_with_aspect_ratio(frame, input_binding_info) + + # Expand dimensions and convert data type to match model input + data_type = np.float32 if input_binding_info[1].GetDataType() == ann.DataType_Float32 else np.uint8 + resized_frame = np.expand_dims(np.asarray(resized_frame, dtype=data_type), axis=0) + assert resized_frame.shape == tuple(input_binding_info[1].GetShape()) + + input_tensors = ann.make_input_tensors([input_binding_info], [resized_frame]) + return input_tensors + + +def resize_with_aspect_ratio(frame: np.ndarray, input_binding_info: tuple): + """ + Resizes frame while maintaining aspect ratio, padding any empty space. + + Args: + frame: Captured frame. + input_binding_info: Contains shape of model input layer. + + Returns: + Frame resized to the size of model input layer. + """ + aspect_ratio = frame.shape[1] / frame.shape[0] + model_height, model_width = list(input_binding_info[1].GetShape())[1:3] + + if aspect_ratio >= 1.0: + new_height, new_width = int(model_width / aspect_ratio), model_width + b_padding, r_padding = model_height - new_height, 0 + else: + new_height, new_width = model_height, int(model_height * aspect_ratio) + b_padding, r_padding = 0, model_width - new_width + + # Resize and pad any empty space + frame = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_LINEAR) + frame = cv2.copyMakeBorder(frame, top=0, bottom=b_padding, left=0, right=r_padding, + borderType=cv2.BORDER_CONSTANT, value=[0, 0, 0]) + return frame + + +def create_video_writer(video: cv2.VideoCapture, video_path: str, output_path: str): + """ + Creates a video writer object to write processed frames to file. + + Args: + video: Video capture object, contains information about data source. + video_path: User-specified video file path. + output_path: Optional path to save the processed video. + + Returns: + Video writer object. + """ + _, ext = os.path.splitext(video_path) + + if output_path is not None: + assert os.path.isdir(output_path) + + i, filename = 0, os.path.join(output_path if output_path is not None else str(), f'object_detection_demo{ext}') + while os.path.exists(filename): + i += 1 + filename = os.path.join(output_path if output_path is not None else str(), f'object_detection_demo({i}){ext}') + + video_writer = cv2.VideoWriter(filename=filename, + fourcc=get_source_encoding_int(video), + fps=int(video.get(cv2.CAP_PROP_FPS)), + frameSize=(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), + int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)))) + return video_writer + + +def init_video_file_capture(video_path: str, output_path: str): + """ + Creates a video capture object from a video file. + + Args: + video_path: User-specified video file path. + output_path: Optional path to save the processed video. + + Returns: + Video capture object to capture frames, video writer object to write processed + frames to file, plus total frame count of video source to iterate through. + """ + if not os.path.exists(video_path): + raise FileNotFoundError(f'Video file not found for: {video_path}') + video = cv2.VideoCapture(video_path) + if not video.isOpened: + raise RuntimeError(f'Failed to open video capture from file: {video_path}') + + video_writer = create_video_writer(video, video_path, output_path) + iter_frame_count = range(int(video.get(cv2.CAP_PROP_FRAME_COUNT))) + return video, video_writer, iter_frame_count + + +def init_video_stream_capture(video_source: int): + """ + Creates a video capture object from a device. + + Args: + video_source: Device index used to read video stream. + + Returns: + Video capture object used to capture frames from a video stream. + """ + video = cv2.VideoCapture(video_source) + if not video.isOpened: + raise RuntimeError(f'Failed to open video capture for device with index: {video_source}') + print('Processing video stream. Press \'Esc\' key to exit the demo.') + return video + + +def draw_bounding_boxes(frame: np.ndarray, detections: list, resize_factor, labels: dict): + """ + Draws bounding boxes around detected objects and adds a label and confidence score. + + Args: + frame: The original captured frame from video source. + detections: A list of detected objects in the form [class, [box positions], confidence]. + resize_factor: Resizing factor to scale box coordinates to output frame size. + labels: Dictionary of labels and colors keyed on the classification index. + """ + for detection in detections: + class_idx, box, confidence = [d for d in detection] + label, color = labels[class_idx][0].capitalize(), labels[class_idx][1] + + # Obtain frame size and resized bounding box positions + frame_height, frame_width = frame.shape[:2] + x_min, y_min, x_max, y_max = [int(position * resize_factor) for position in box] + + # Ensure box stays within the frame + x_min, y_min = max(0, x_min), max(0, y_min) + x_max, y_max = min(frame_width, x_max), min(frame_height, y_max) + + # Draw bounding box around detected object + cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2) + + # Create label for detected object class + label = f'{label} {confidence * 100:.1f}%' + label_color = (0, 0, 0) if sum(color)>200 else (255, 255, 255) + + # Make sure label always stays on-screen + x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2] + + lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text) + lbl_box_xy_max = (x_min + int(0.55 * x_text), y_min + y_text if y_min<25 else y_min) + lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5) + + # Add label and confidence value + cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1) + cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.50, + label_color, 1, cv2.LINE_AA) + + +def get_source_encoding_int(video_capture): + return int(video_capture.get(cv2.CAP_PROP_FOURCC)) diff --git a/python/pyarmnn/examples/common/network_executor.py b/python/pyarmnn/examples/common/network_executor.py new file mode 100644 index 0000000000..6e2c53c43d --- /dev/null +++ b/python/pyarmnn/examples/common/network_executor.py @@ -0,0 +1,108 @@ +# Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +# SPDX-License-Identifier: MIT + +import os +from typing import List, Tuple + +import pyarmnn as ann +import numpy as np + + +def create_network(model_file: str, backends: list, input_names: Tuple[str] = (), output_names: Tuple[str] = ()): + """ + Creates a network based on the model file and a list of backends. + + Args: + model_file: User-specified model file. + backends: List of backends to optimize network. + input_names: + output_names: + + Returns: + net_id: Unique ID of the network to run. + runtime: Runtime context for executing inference. + input_binding_info: Contains essential information about the model input. + output_binding_info: Used to map output tensor and its memory. + """ + if not os.path.exists(model_file): + raise FileNotFoundError(f'Model file not found for: {model_file}') + + _, ext = os.path.splitext(model_file) + if ext == '.tflite': + parser = ann.ITfLiteParser() + else: + raise ValueError("Supplied model file type is not supported. Supported types are [ tflite ]") + + network = parser.CreateNetworkFromBinaryFile(model_file) + + # Specify backends to optimize network + preferred_backends = [] + for b in backends: + preferred_backends.append(ann.BackendId(b)) + + # Select appropriate device context and optimize the network for that device + options = ann.CreationOptions() + runtime = ann.IRuntime(options) + opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(), + ann.OptimizerOptions()) + print(f'Preferred backends: {backends}\n{runtime.GetDeviceSpec()}\n' + f'Optimization warnings: {messages}') + + # Load the optimized network onto the Runtime device + net_id, _ = runtime.LoadNetwork(opt_network) + + # Get input and output binding information + graph_id = parser.GetSubgraphCount() - 1 + input_names = parser.GetSubgraphInputTensorNames(graph_id) + input_binding_info = parser.GetNetworkInputBindingInfo(graph_id, input_names[0]) + output_names = parser.GetSubgraphOutputTensorNames(graph_id) + output_binding_info = [] + for output_name in output_names: + out_bind_info = parser.GetNetworkOutputBindingInfo(graph_id, output_name) + output_binding_info.append(out_bind_info) + return net_id, runtime, input_binding_info, output_binding_info + + +def execute_network(input_tensors: list, output_tensors: list, runtime, net_id: int) -> List[np.ndarray]: + """ + Executes inference for the loaded network. + + Args: + input_tensors: The input frame tensor. + output_tensors: The output tensor from output node. + runtime: Runtime context for executing inference. + net_id: Unique ID of the network to run. + + Returns: + list: Inference results as a list of ndarrays. + """ + runtime.EnqueueWorkload(net_id, input_tensors, output_tensors) + output = ann.workload_tensors_to_ndarray(output_tensors) + return output + + +class ArmnnNetworkExecutor: + + def __init__(self, model_file: str, backends: list): + """ + Creates an inference executor for a given network and a list of backends. + + Args: + model_file: User-specified model file. + backends: List of backends to optimize network. + """ + self.network_id, self.runtime, self.input_binding_info, self.output_binding_info = create_network(model_file, + backends) + self.output_tensors = ann.make_output_tensors(self.output_binding_info) + + def run(self, input_tensors: list) -> List[np.ndarray]: + """ + Executes inference for the loaded network. + + Args: + input_tensors: The input frame tensor. + + Returns: + list: Inference results as a list of ndarrays. + """ + return execute_network(input_tensors, self.output_tensors, self.runtime, self.network_id) diff --git a/python/pyarmnn/examples/common/tests/conftest.py b/python/pyarmnn/examples/common/tests/conftest.py new file mode 100644 index 0000000000..5e027a0125 --- /dev/null +++ b/python/pyarmnn/examples/common/tests/conftest.py @@ -0,0 +1,40 @@ +# Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +# SPDX-License-Identifier: MIT + +import os +import ntpath + +import urllib.request +import zipfile + +import pytest + +script_dir = os.path.dirname(__file__) +@pytest.fixture(scope="session") +def test_data_folder(request): + """ + This fixture returns path to folder with shared test resources among all tests + """ + + data_dir = os.path.join(script_dir, "testdata") + if not os.path.exists(data_dir): + os.mkdir(data_dir) + + files_to_download = ["https://raw.githubusercontent.com/opencv/opencv/4.0.0/samples/data/messi5.jpg", + "https://raw.githubusercontent.com/opencv/opencv/4.0.0/samples/data/basketball1.png", + "https://raw.githubusercontent.com/opencv/opencv/4.0.0/samples/data/Megamind.avi", + "https://storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip" + ] + + for file in files_to_download: + path, filename = ntpath.split(file) + file_path = os.path.join(data_dir, filename) + if not os.path.exists(file_path): + print("\nDownloading test file: " + file_path + "\n") + urllib.request.urlretrieve(file, file_path) + + # Any unzipping needed, and moving around of files + with zipfile.ZipFile(os.path.join(data_dir, "coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip"), 'r') as zip_ref: + zip_ref.extractall(data_dir) + + return data_dir diff --git a/python/pyarmnn/examples/common/tests/context.py b/python/pyarmnn/examples/common/tests/context.py new file mode 100644 index 0000000000..72246c03bf --- /dev/null +++ b/python/pyarmnn/examples/common/tests/context.py @@ -0,0 +1,7 @@ +import os +import sys +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +import cv_utils +import network_executor +import utils diff --git a/python/pyarmnn/examples/common/tests/test_network_executor.py b/python/pyarmnn/examples/common/tests/test_network_executor.py new file mode 100644 index 0000000000..e27b382078 --- /dev/null +++ b/python/pyarmnn/examples/common/tests/test_network_executor.py @@ -0,0 +1,24 @@ +# Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +# SPDX-License-Identifier: MIT + +import os + +import cv2 + +from context import network_executor +from context import cv_utils + + +def test_execute_network(test_data_folder): + model_path = os.path.join(test_data_folder, "detect.tflite") + backends = ["CpuAcc", "CpuRef"] + + executor = network_executor.ArmnnNetworkExecutor(model_path, backends) + img = cv2.imread(os.path.join(test_data_folder, "messi5.jpg")) + input_tensors = cv_utils.preprocess(img, executor.input_binding_info) + + output_result = executor.run(input_tensors) + + # Ensure it detects a person + classes = output_result[1] + assert classes[0][0] == 0 diff --git a/python/pyarmnn/examples/common/tests/test_utils.py b/python/pyarmnn/examples/common/tests/test_utils.py new file mode 100644 index 0000000000..28d68ea235 --- /dev/null +++ b/python/pyarmnn/examples/common/tests/test_utils.py @@ -0,0 +1,19 @@ +# Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +# SPDX-License-Identifier: MIT + +import os + +from context import cv_utils +from context import utils + + +def test_get_source_encoding(test_data_folder): + video_file = os.path.join(test_data_folder, "Megamind.avi") + video, video_writer, frame_count = cv_utils.init_video_file_capture(video_file, "/tmp") + assert cv_utils.get_source_encoding_int(video) == 1145656920 + + +def test_read_existing_labels_file(test_data_folder): + label_file = os.path.join(test_data_folder, "labelmap.txt") + labels_map = utils.dict_labels(label_file) + assert labels_map is not None diff --git a/python/pyarmnn/examples/common/utils.py b/python/pyarmnn/examples/common/utils.py new file mode 100644 index 0000000000..cf09fdefb8 --- /dev/null +++ b/python/pyarmnn/examples/common/utils.py @@ -0,0 +1,41 @@ +# Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +# SPDX-License-Identifier: MIT + +"""Contains helper functions that can be used across the example apps.""" + +import os +import errno +from pathlib import Path + +import numpy as np + + +def dict_labels(labels_file_path: str, include_rgb=False) -> dict: + """Creates a dictionary of labels from the input labels file. + + Args: + labels_file: Path to file containing labels to map model outputs. + include_rgb: Adds randomly generated RGB values to the values of the + dictionary. Used for plotting bounding boxes of different colours. + + Returns: + Dictionary with classification indices for keys and labels for values. + + Raises: + FileNotFoundError: + Provided `labels_file_path` does not exist. + """ + labels_file = Path(labels_file_path) + if not labels_file.is_file(): + raise FileNotFoundError( + errno.ENOENT, os.strerror(errno.ENOENT), labels_file_path + ) + + labels = {} + with open(labels_file, "r") as f: + for idx, line in enumerate(f, 0): + if include_rgb: + labels[idx] = line.strip("\n"), tuple(np.random.random(size=3) * 255) + else: + labels[idx] = line.strip("\n") + return labels -- cgit v1.2.1