/* * Copyright (c) 2019-2022 Arm Limited. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the License); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an AS IS BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /****************************************************************************** * Includes ******************************************************************************/ #include "ethosu_driver.h" #include "ethosu_config.h" #include "ethosu_device.h" #include "ethosu_log.h" #include #include #include #include #include #include #include /****************************************************************************** * Defines ******************************************************************************/ #define UNUSED(x) ((void)x) #define BYTES_IN_32_BITS 4 #define MASK_16_BYTE_ALIGN (0xF) #define OPTIMIZER_CONFIG_LENGTH_32_BIT_WORD 2 #define DRIVER_ACTION_LENGTH_32_BIT_WORD 1 #define ETHOSU_FOURCC ('1' << 24 | 'P' << 16 | 'O' << 8 | 'C') // "Custom Operator Payload 1" #define FAST_MEMORY_BASE_ADDR_INDEX 2 /****************************************************************************** * Types ******************************************************************************/ // Driver actions enum DRIVER_ACTION_e { RESERVED = 0, OPTIMIZER_CONFIG = 1, COMMAND_STREAM = 2, NOP = 5, }; // Custom operator payload data struct struct cop_data_s { union { // Driver action data struct { uint8_t driver_action_command; // (valid values in DRIVER_ACTION_e) uint8_t reserved; // Driver action data union { // DA_CMD_OPT_CFG struct { uint16_t rel_nbr : 4; uint16_t patch_nbr : 4; uint16_t opt_cfg_reserved : 8; }; // DA_CMD_CMSTRM struct { uint16_t length; }; uint16_t driver_action_data; }; }; uint32_t word; }; }; // optimizer config struct struct opt_cfg_s { struct cop_data_s da_data; uint32_t cfg; uint32_t id; }; /****************************************************************************** * Variables ******************************************************************************/ // Registered drivers linked list HEAD static struct ethosu_driver *registered_drivers = NULL; /****************************************************************************** * Weak functions - Cache * * Default NOP operations. Override if available on the targeted device. ******************************************************************************/ /* * Flush/clean the data cache by address and size. Passing NULL as p argument * expects the whole cache to be flushed. */ void __attribute__((weak)) ethosu_flush_dcache(uint32_t *p, size_t bytes) { UNUSED(p); UNUSED(bytes); } /* * Invalidate the data cache by address and size. Passing NULL as p argument * expects the whole cache to be invalidated. */ void __attribute__((weak)) ethosu_invalidate_dcache(uint32_t *p, size_t bytes) { UNUSED(p); UNUSED(bytes); } /****************************************************************************** * Weak functions - Semaphore/Mutex for multi NPU * * Following section handles the minimal sempahore and mutex implementation in * case of baremetal applications. Weak symbols will be overridden by RTOS * definitions and implement true thread-safety (in application layer). ******************************************************************************/ struct ethosu_semaphore_t { uint8_t count; }; static void *ethosu_mutex; static void *ethosu_semaphore; void *__attribute__((weak)) ethosu_mutex_create(void) { return NULL; } void __attribute__((weak)) ethosu_mutex_destroy(void *mutex) { UNUSED(mutex); } int __attribute__((weak)) ethosu_mutex_lock(void *mutex) { UNUSED(mutex); return 0; } int __attribute__((weak)) ethosu_mutex_unlock(void *mutex) { UNUSED(mutex); return 0; } // Baremetal implementation of creating a semaphore void *__attribute__((weak)) ethosu_semaphore_create(void) { struct ethosu_semaphore_t *sem = malloc(sizeof(*sem)); sem->count = 0; return sem; } void __attribute__((weak)) ethosu_semaphore_destroy(void *sem) { free((struct ethosu_semaphore_t *)sem); } // Baremetal simulation of waiting/sleeping for and then taking a semaphore using intrisics int __attribute__((weak)) ethosu_semaphore_take(void *sem) { struct ethosu_semaphore_t *s = sem; while (s->count == 0) { __WFE(); } s->count = 0; return 0; } // Baremetal simulation of giving a semaphore and waking up processes using intrinsics int __attribute__((weak)) ethosu_semaphore_give(void *sem) { struct ethosu_semaphore_t *s = sem; s->count = 1; __SEV(); return 0; } /****************************************************************************** * Weak functions - Inference begin/end callbacks ******************************************************************************/ void __attribute__((weak)) ethosu_inference_begin(struct ethosu_driver *drv, void *user_arg) { UNUSED(user_arg); UNUSED(drv); } void __attribute__((weak)) ethosu_inference_end(struct ethosu_driver *drv, void *user_arg) { UNUSED(user_arg); UNUSED(drv); } /****************************************************************************** * Static functions ******************************************************************************/ static void ethosu_register_driver(struct ethosu_driver *drv) { // Register driver as new HEAD of list drv->next = registered_drivers; registered_drivers = drv; LOG_INFO("New NPU driver registered (handle: 0x%p, NPU: 0x%p)", drv, drv->dev->reg); } static int ethosu_deregister_driver(struct ethosu_driver *drv) { struct ethosu_driver *cur = registered_drivers; struct ethosu_driver **prev = ®istered_drivers; while (cur != NULL) { if (cur == drv) { *prev = cur->next; LOG_INFO("NPU driver handle %p deregistered.", drv); return 0; } prev = &cur->next; cur = cur->next; } LOG_ERR("No NPU driver handle registered at address %p.", drv); return -1; } static struct ethosu_driver *ethosu_find_and_reserve_driver(void) { struct ethosu_driver *drv = registered_drivers; while (drv != NULL) { if (!drv->reserved) { drv->reserved = true; LOG_DEBUG("NPU driver handle %p reserved.", drv); return drv; } drv = drv->next; } LOG_WARN("No NPU driver handle available."); return NULL; } static void ethosu_reset_job(struct ethosu_driver *drv) { memset(&drv->job, 0, sizeof(struct ethosu_job)); } static int handle_optimizer_config(struct ethosu_driver *drv, struct opt_cfg_s *opt_cfg_p) { LOG_INFO("Optimizer release nbr: %d patch: %d", opt_cfg_p->da_data.rel_nbr, opt_cfg_p->da_data.patch_nbr); if (ethosu_dev_verify_optimizer_config(drv->dev, opt_cfg_p->cfg, opt_cfg_p->id) != true) { return -1; } return 0; } static int handle_command_stream(struct ethosu_driver *drv, const uint8_t *cmd_stream, const int cms_length) { uint32_t cms_bytes = cms_length * BYTES_IN_32_BITS; ptrdiff_t cmd_stream_ptr = (ptrdiff_t)cmd_stream; LOG_INFO("handle_command_stream: cmd_stream=%p, cms_length %d", cmd_stream, cms_length); if (0 != ((ptrdiff_t)cmd_stream & MASK_16_BYTE_ALIGN)) { LOG_ERR("Command stream addr %p not aligned to 16 bytes", cmd_stream); return -1; } // Verify 16 byte alignment for base address' for (int i = 0; i < drv->job.num_base_addr; i++) { if (0 != (drv->job.base_addr[i] & MASK_16_BYTE_ALIGN)) { LOG_ERR("Base addr %d: 0x%llx not aligned to 16 bytes", i, drv->job.base_addr[i]); return -1; } } drv->job.state = ETHOSU_JOB_RUNNING; // Flush the cache if available on CPU. // The upcasting to uin32_t* is ok since the pointer never is dereferenced. // The base_addr_size is null if invoking from prior to invoke_V2, in that case // the whole cache is being flushed. if (drv->job.base_addr_size != NULL) { ethosu_flush_dcache((uint32_t *)cmd_stream_ptr, cms_bytes); for (int i = 0; i < drv->job.num_base_addr; i++) { ethosu_flush_dcache((uint32_t *)(uintptr_t)drv->job.base_addr[i], drv->job.base_addr_size[i]); } } else { ethosu_flush_dcache(NULL, 0); } // Request power gating disabled during inference run if (!drv->dev_power_always_on) { // Will soft reset if security state or privilege level needs changing. // Also note that any configurations done in the NPU prior to this point // are lost in case power gating has been in effect. set_clock_and_power_request(drv, ETHOSU_INFERENCE_REQUEST, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_DISABLE); // Make sure AXI settings are applied ethosu_dev_axi_init(drv->dev); } // Inference begin callback ethosu_inference_begin(drv, drv->job.user_arg); // Execute the command stream ethosu_dev_run_command_stream(drv->dev, cmd_stream, cms_bytes, drv->job.base_addr, drv->job.num_base_addr); return 0; } /****************************************************************************** * Weak functions - Interrupt handler ******************************************************************************/ void __attribute__((weak)) ethosu_irq_handler(struct ethosu_driver *drv) { LOG_DEBUG("Got interrupt from Ethos-U"); drv->job.state = ETHOSU_JOB_DONE; if (!ethosu_dev_handle_interrupt(drv->dev)) { drv->status_error = true; } /* TODO: feedback needed aout how to handle error (-1) return value */ ethosu_semaphore_give(drv->semaphore); } /****************************************************************************** * Functions API ******************************************************************************/ int ethosu_init(struct ethosu_driver *drv, const void *base_address, const void *fast_memory, const size_t fast_memory_size, uint32_t secure_enable, uint32_t privilege_enable) { LOG_INFO("Initializing NPU: base_address=%p, fast_memory=%p, fast_memory_size=%zu, secure=%" PRIu32 ", privileged=%" PRIu32, base_address, fast_memory, fast_memory_size, secure_enable, privilege_enable); if (!ethosu_mutex) { ethosu_mutex = ethosu_mutex_create(); } if (!ethosu_semaphore) { ethosu_semaphore = ethosu_semaphore_create(); } drv->fast_memory = (uint32_t)fast_memory; drv->fast_memory_size = fast_memory_size; // Initialize the device and set requested security state and privilege mode drv->dev = ethosu_dev_init(base_address, secure_enable, privilege_enable); if (drv->dev == NULL) { LOG_ERR("Failed to initialize Ethos-U device"); return -1; } // Power always ON requested if (drv->dev_power_always_on) { if (set_clock_and_power_request(drv, ETHOSU_INFERENCE_REQUEST, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_DISABLE) != ETHOSU_SUCCESS) { LOG_ERR("Failed to disable power-q for Ethos-U"); return -1; } } drv->semaphore = ethosu_semaphore_create(); drv->status_error = false; ethosu_register_driver(drv); return 0; } void ethosu_deinit(struct ethosu_driver *drv) { ethosu_deregister_driver(drv); ethosu_semaphore_destroy(drv->semaphore); ethosu_dev_deinit(drv->dev); drv->dev = NULL; } void ethosu_get_driver_version(struct ethosu_driver_version *ver) { assert(ver != NULL); ver->major = ETHOSU_DRIVER_VERSION_MAJOR; ver->minor = ETHOSU_DRIVER_VERSION_MINOR; ver->patch = ETHOSU_DRIVER_VERSION_PATCH; } void ethosu_get_hw_info(struct ethosu_driver *drv, struct ethosu_hw_info *hw) { assert(hw != NULL); ethosu_dev_get_hw_info(drv->dev, hw); } int ethosu_wait(struct ethosu_driver *drv, bool block) { int ret = 0; switch (drv->job.state) { case ETHOSU_JOB_IDLE: LOG_ERR("Inference job not running..."); ret = -2; break; case ETHOSU_JOB_RUNNING: if (!block) { // Inference still running, do not block ret = 1; break; } // fall through case ETHOSU_JOB_DONE: // Wait for interrupt in blocking mode. In non-blocking mode // the interrupt has already triggered /* TODO: feedback needed aout how to handle error (-1) return value */ ethosu_semaphore_take(drv->semaphore); // Inference done callback ethosu_inference_end(drv, drv->job.user_arg); // Check NPU and interrupt status if (drv->status_error) { LOG_ERR("NPU error(s) occured during inference."); ethosu_dev_print_err_status(drv->dev); // Reset the NPU (void)ethosu_dev_soft_reset(drv->dev); // NPU is no longer in error state drv->status_error = false; ret = -1; } // Clear the clock/power gating disable request if (!drv->dev_power_always_on) { // NOTE: Other requesters (like PMU) can be active, keeping // clock/power gating disabled until no requests remain. set_clock_and_power_request(drv, ETHOSU_INFERENCE_REQUEST, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_ENABLE); } if (ret == 0) { // Invalidate cache if (drv->job.base_addr_size != NULL) { for (int i = 0; i < drv->job.num_base_addr; i++) { ethosu_invalidate_dcache((uint32_t *)(uintptr_t)drv->job.base_addr[i], drv->job.base_addr_size[i]); } } else { ethosu_invalidate_dcache(NULL, 0); } LOG_DEBUG("Inference finished successfully..."); } // Reset internal job (state resets to IDLE) ethosu_reset_job(drv); break; default: LOG_ERR("Unexpected job state"); ethosu_reset_job(drv); ret = -1; break; } // Return inference job status return ret; } int ethosu_invoke_async(struct ethosu_driver *drv, const void *custom_data_ptr, const int custom_data_size, const uint64_t *base_addr, const size_t *base_addr_size, const int num_base_addr, void *user_arg) { const struct cop_data_s *data_ptr = custom_data_ptr; const struct cop_data_s *data_end = (struct cop_data_s *)((ptrdiff_t)custom_data_ptr + custom_data_size); // Make sure an inference is not already running if (drv->job.state != ETHOSU_JOB_IDLE) { LOG_ERR("Inference already running, or waiting to be cleared..."); return -1; } drv->job.state = ETHOSU_JOB_IDLE; drv->job.custom_data_ptr = custom_data_ptr; drv->job.custom_data_size = custom_data_size; drv->job.base_addr = base_addr; drv->job.base_addr_size = base_addr_size; drv->job.num_base_addr = num_base_addr; drv->job.user_arg = user_arg; // First word in custom_data_ptr should contain "Custom Operator Payload 1" if (data_ptr->word != ETHOSU_FOURCC) { LOG_ERR("Custom Operator Payload: %" PRIu32 " is not correct, expected %x", data_ptr->word, ETHOSU_FOURCC); goto err; } // Custom data length must be a multiple of 32 bits if ((custom_data_size % BYTES_IN_32_BITS) != 0) { LOG_ERR("custom_data_size=0x%x not a multiple of 4", custom_data_size); goto err; } data_ptr++; // Adjust base address to fast memory area if (drv->fast_memory != 0 && num_base_addr >= FAST_MEMORY_BASE_ADDR_INDEX) { uint64_t *fast_memory = (uint64_t *)&base_addr[FAST_MEMORY_BASE_ADDR_INDEX]; if (base_addr_size != NULL && base_addr_size[FAST_MEMORY_BASE_ADDR_INDEX] > drv->fast_memory_size) { LOG_ERR("Fast memory area too small. fast_memory_size=%u, base_addr_size=%u", drv->fast_memory_size, base_addr_size[FAST_MEMORY_BASE_ADDR_INDEX]); goto err; } *fast_memory = drv->fast_memory; } drv->status_error = false; // Parse Custom Operator Payload data while (data_ptr < data_end) { switch (data_ptr->driver_action_command) { case OPTIMIZER_CONFIG: LOG_DEBUG("OPTIMIZER_CONFIG"); struct opt_cfg_s *opt_cfg_p = (struct opt_cfg_s *)data_ptr; if (handle_optimizer_config(drv, opt_cfg_p) < 0) { goto err; } data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD + OPTIMIZER_CONFIG_LENGTH_32_BIT_WORD; break; case COMMAND_STREAM: // Vela only supports putting one COMMAND_STREAM per op LOG_DEBUG("COMMAND_STREAM"); void *command_stream = (uint8_t *)(data_ptr) + sizeof(struct cop_data_s); int cms_length = (data_ptr->reserved << 16) | data_ptr->length; if (handle_command_stream(drv, command_stream, cms_length) < 0) { goto err; } data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD + cms_length; break; case NOP: LOG_DEBUG("NOP"); data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD; break; default: LOG_ERR("UNSUPPORTED driver_action_command: %d", data_ptr->driver_action_command); goto err; break; } } return 0; err: LOG_ERR("Failed to invoke inference."); ethosu_reset_job(drv); return -1; } int ethosu_invoke_v3(struct ethosu_driver *drv, const void *custom_data_ptr, const int custom_data_size, const uint64_t *base_addr, const size_t *base_addr_size, const int num_base_addr, void *user_arg) { if (ethosu_invoke_async( drv, custom_data_ptr, custom_data_size, base_addr, base_addr_size, num_base_addr, user_arg) < 0) { return -1; } return ethosu_wait(drv, true); } void ethosu_set_power_mode(struct ethosu_driver *drv, bool always_on) { drv->dev_power_always_on = always_on; if (always_on) { if (ethosu_dev_verify_access_state(drv->dev) == false) { // Reset to enter correct security state/privilege mode if (ethosu_dev_soft_reset(drv->dev) == false) { LOG_ERR("Failed to set power mode for Ethos-U"); return; } } ethosu_dev_set_clock_and_power(drv->dev, ETHOSU_CLOCK_Q_UNCHANGED, ETHOSU_POWER_Q_DISABLE); ethosu_dev_axi_init(drv->dev); } else { ethosu_dev_set_clock_and_power(drv->dev, ETHOSU_CLOCK_Q_UNCHANGED, ETHOSU_POWER_Q_ENABLE); } } struct ethosu_driver *ethosu_reserve_driver(void) { struct ethosu_driver *drv = NULL; do { /* TODO: feedback needed aout how to handle error (-1) return value */ ethosu_mutex_lock(ethosu_mutex); drv = ethosu_find_and_reserve_driver(); /* TODO: feedback needed aout how to handle error (-1) return value */ ethosu_mutex_unlock(ethosu_mutex); if (drv != NULL) { break; } LOG_INFO("Waiting for NPU driver handle to become available..."); /* TODO: feedback needed aout how to handle error (-1) return value */ ethosu_semaphore_take(ethosu_semaphore); } while (1); return drv; } void ethosu_release_driver(struct ethosu_driver *drv) { /* TODO: feedback needed aout how to handle error (-1) return value */ ethosu_mutex_lock(ethosu_mutex); if (drv != NULL && drv->reserved) { if (drv->job.state == ETHOSU_JOB_RUNNING || drv->job.state == ETHOSU_JOB_DONE) { // Give the inference one shot to complete or force kill the job if (ethosu_wait(drv, false) == 1) { // Still running, soft reset the NPU and reset driver ethosu_dev_soft_reset(drv->dev); ethosu_reset_job(drv); drv->status_error = false; /* TODO: feedback needed aout how to handle error (-1) return value */ ethosu_semaphore_give(drv->semaphore); (void)set_clock_and_power_request( drv, ETHOSU_INFERENCE_REQUEST, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_ENABLE); } } drv->reserved = false; LOG_DEBUG("NPU driver handle %p released", drv); /* TODO: feedback needed aout how to handle error (-1) return value */ ethosu_semaphore_give(ethosu_semaphore); } /* TODO: feedback needed aout how to handle error (-1) return value */ ethosu_mutex_unlock(ethosu_mutex); } enum ethosu_error_codes set_clock_and_power_request(struct ethosu_driver *drv, enum ethosu_request_clients client, enum ethosu_clock_q_request clock_request, enum ethosu_power_q_request power_request) { // Keep track of which client requests clock gating to be disabled if (clock_request == ETHOSU_CLOCK_Q_DISABLE) { drv->clock_request |= (1 << client); } else if (clock_request == ETHOSU_CLOCK_Q_ENABLE) // Remove client from bitmask { drv->clock_request &= ~(1 << client); } // Only enable clock gating when no client has asked for it to be disabled clock_request = drv->clock_request == 0 ? ETHOSU_CLOCK_Q_ENABLE : ETHOSU_CLOCK_Q_DISABLE; // Keep track of which client requests power gating to be disabled if (power_request == ETHOSU_POWER_Q_DISABLE) { drv->power_request |= (1 << client); } else if (power_request == ETHOSU_POWER_Q_ENABLE) { drv->power_request &= ~(1 << client); } // Override if power has been requested to be always on if (drv->dev_power_always_on == true) { power_request = ETHOSU_POWER_Q_DISABLE; } else { // Only enable power gating when no client has asked for it to be disabled power_request = drv->power_request == 0 ? ETHOSU_POWER_Q_ENABLE : ETHOSU_POWER_Q_DISABLE; } // Verify security state and privilege mode if power is requested to be on if (power_request == ETHOSU_POWER_Q_DISABLE) { if (ethosu_dev_verify_access_state(drv->dev) == false) { if (ethosu_dev_soft_reset(drv->dev) != ETHOSU_SUCCESS) { LOG_ERR("Failed to set clock and power q channels for Ethos-U"); return ETHOSU_GENERIC_FAILURE; } } } // Set clock and power return ethosu_dev_set_clock_and_power(drv->dev, clock_request, power_request); }