diff options
Diffstat (limited to 'source/hal/source/components/npu/ethosu_cpu_cache.c')
-rw-r--r-- | source/hal/source/components/npu/ethosu_cpu_cache.c | 94 |
1 files changed, 82 insertions, 12 deletions
diff --git a/source/hal/source/components/npu/ethosu_cpu_cache.c b/source/hal/source/components/npu/ethosu_cpu_cache.c index 0840971..d5f5e47 100644 --- a/source/hal/source/components/npu/ethosu_cpu_cache.c +++ b/source/hal/source/components/npu/ethosu_cpu_cache.c @@ -21,34 +21,104 @@ #include "ethosu_driver.h" /* Arm Ethos-U driver header */ #include "log_macros.h" /* Logging macros */ +/** Structure to maintain data cache states. */ +typedef struct _cpu_cache_state { + uint32_t dcache_invalidated : 1; + uint32_t dcache_cleaned : 1; +} cpu_cache_state; + +/** Static CPU cache state object. + * @note This logic around flipping these states is based on the driver + * calling the functions in this sequence: + * + * Cache flush (ethosu_flush_dcache) + * ↓ + * Start inference (ethosu_inference_begin) + * ↓ + * Inference (ethosu_dev_run_command_stream) + * ↓ + * End inference (ethosu_inference_end) + * ↓ + * Cache invalidate (ethosu_dcache_invalidate) + **/ +static cpu_cache_state s_cache_state = {.dcache_cleaned = 0, .dcache_invalidated = 0}; + +/** + * @brief Gets the current CPU cache state. + * @return Pointer to the CPU cache state object. + */ +static cpu_cache_state* ethosu_get_cpu_cache_state(void) +{ + return &s_cache_state; +} + +void ethosu_clear_cache_states(void) +{ + cpu_cache_state* const state = ethosu_get_cpu_cache_state(); + trace("Clearing cache state members\n"); + state->dcache_invalidated = 0; + state->dcache_cleaned = 0; +} + void ethosu_flush_dcache(uint32_t *p, size_t bytes) { + UNUSED(p); + UNUSED(bytes); #if defined (__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U) + cpu_cache_state* const state = ethosu_get_cpu_cache_state(); if (SCB->CCR & SCB_CCR_DC_Msk) { - if (p) { - SCB_CleanDCache_by_Addr((void *) p, (int32_t) bytes); - } else { + + /** + * @note We could choose to call the `SCB_CleanDCache_by_Addr` function + * here, but the sizes which this function is called for, can + * cause unnecessary delays. It's worth noting that this function + * is called from the Arm Ethos-U NPU drive repeatedly for each + * region it accesses. This could even be RO memory which does + * not need cache maintenance, along with parts of the input and + * output tensors which rightly need to be cleaned. Therefore, to + * reduce overhead of repeated calls for large memory sizes, we + * call the clean and invalidation functions for whole cache. + * + * If the neural network to be executed is completely falling + * onto the NPU, consider disabling the data cache altogether + * for the duration of the inference to further reduce the cache + * maintenance burden in these functions. + */ + + /** Clean the cache if it hasn't been cleaned already */ + if (!state->dcache_cleaned) { + trace("Cleaning data cache\n"); SCB_CleanDCache(); + + /** Assert the cache cleaned state and clear the invalidation + * state. */ + state->dcache_cleaned = 1; + state->dcache_invalidated = 0; } } -#else - UNUSED(p); - UNUSED(bytes); #endif /* defined (__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U) */ } void ethosu_invalidate_dcache(uint32_t *p, size_t bytes) { + UNUSED(p); + UNUSED(bytes); #if defined (__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U) + cpu_cache_state* const state = ethosu_get_cpu_cache_state(); if (SCB->CCR & SCB_CCR_DC_Msk) { - if (p) { - SCB_InvalidateDCache_by_Addr((void *) p, (int32_t) bytes); - } else { + /** + * See note in ethosu_flush_dcache function for why we clean the whole + * cache instead of calling it for specific addresses. + **/ + if (!state->dcache_invalidated) { + trace("Invalidating data cache\n"); SCB_InvalidateDCache(); + + /** Assert the cache invalidation state and clear the clean + * state. */ + state->dcache_invalidated = 1; + state->dcache_cleaned = 0; } } -#else - UNUSED(p); - UNUSED(bytes); #endif /* defined (__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U) */ } |