aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_conv/depthwise/depthwise_planar.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/kernels/arm_conv/depthwise/depthwise_planar.hpp')
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/depthwise_planar.hpp27
1 files changed, 5 insertions, 22 deletions
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_planar.hpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_planar.hpp
index adcbedf4ce..64832a38a9 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_planar.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_planar.hpp
@@ -210,30 +210,13 @@ struct OutputRowPtrsElement
T *output_padding_buffer;
};
- // On some implementations there is a significant performance benefit to
- // aligning the padding buffer to a 1024 byte boundary. This routine
- // adds as much padding as needed to an arbitrary input pointer and
- // returns an aligned void *.
- static constexpr intptr_t BUFFER_ALIGNMENT=1024;
-
- template <typename ptr_T>
- static void *do_align(ptr_T in)
- {
- intptr_t v = reinterpret_cast<intptr_t>(in);
- intptr_t odds = v & (BUFFER_ALIGNMENT-1);
- intptr_t pad = odds ? (BUFFER_ALIGNMENT - odds) : 0;
-
- return reinterpret_cast<void *>(v + pad);
- }
-
template <typename OutputStage>
static size_t get_element_size(const WorkspaceArgs<IPlanarStrategy<OutputStage>, OutputStage> &args)
{
- // We need one pointer and stride for each row of output, and an
- // additional blob of memory into which padded stores can go. Allow
- // extra space so that this padding buffer can be aligned at both ends.
+ // We need one pointer and stride for each row of output, and an additional
+ // blob of memory into which padded stores can go.
return args.strategy->get_output_rows() * (sizeof(T *) + 2*sizeof(size_t)) +
- get_vector_length<char>(args.strategy->get_vl_type()) + BUFFER_ALIGNMENT*2;
+ get_vector_length<char>(args.strategy->get_vl_type());
}
template <typename WorkspaceType, typename OutputStage>
@@ -244,8 +227,8 @@ struct OutputRowPtrsElement
ws->output_row_ptrs = reinterpret_cast<T **>(buffer);
ws->output_ld_cols = reinterpret_cast<size_t *>(ws->output_row_ptrs + n_rows);
ws->output_ld_vls = ws->output_ld_cols + n_rows;
- ws->output_padding_buffer = reinterpret_cast<T *>(do_align(ws->output_ld_vls + n_rows));
- return do_align(ws->output_padding_buffer + get_vector_length<T>(args.strategy->get_vl_type()));
+ ws->output_padding_buffer = reinterpret_cast<T *>(ws->output_ld_vls + n_rows);
+ return ws->output_padding_buffer + get_vector_length<T>(args.strategy->get_vl_type());
}
};