diff options
-rw-r--r-- | src/core/AccessWindowTranspose.cpp | 4 | ||||
-rw-r--r-- | src/core/CL/cl_kernels/gemm.cl | 6 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 2 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEFullyConnectedLayer.cpp | 2 | ||||
-rw-r--r-- | tests/datasets/FullyConnectedLayerDataset.h | 6 |
5 files changed, 13 insertions, 7 deletions
diff --git a/src/core/AccessWindowTranspose.cpp b/src/core/AccessWindowTranspose.cpp index b1043305b8..4506a0b44c 100644 --- a/src/core/AccessWindowTranspose.cpp +++ b/src/core/AccessWindowTranspose.cpp @@ -201,8 +201,8 @@ bool AccessWindowTranspose::update_padding_if_needed(const Window &window) const PaddingSize padding; padding.left = std::max(0, -min_x); padding.right = std::max<int>(0, max_x - shape[0]); - padding.top = shape.num_dimensions() == 1 ? 0 : std::max(0, -min_y); - padding.bottom = shape.num_dimensions() == 1 ? 0 : std::max<int>(0, max_y - shape[1]); + padding.top = std::max(0, -min_y); + padding.bottom = std::max<int>(0, max_y - shape[1]); // Update strides in tensor info return _info->extend_padding(padding); diff --git a/src/core/CL/cl_kernels/gemm.cl b/src/core/CL/cl_kernels/gemm.cl index 35a2e4704f..7f2a08bc2c 100644 --- a/src/core/CL/cl_kernels/gemm.cl +++ b/src/core/CL/cl_kernels/gemm.cl @@ -1040,7 +1040,7 @@ __kernel void gemm_mm_floating_point(IMAGE_DECLARATION(src0), VECTOR_TYPE acc3 = 0.0f; #endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 - for(; src_addr.s0 <= (end_row_vec_a - 2 * sizeof(DATA_TYPE)); src_addr += (int2)(2 * sizeof(DATA_TYPE), 2 * src1_stride_y)) + for(; src_addr.s0 <= (end_row_vec_a - 2 * (int)sizeof(DATA_TYPE)); src_addr += (int2)(2 * sizeof(DATA_TYPE), 2 * src1_stride_y)) { // Load values from matrix A VEC_DATA_TYPE(DATA_TYPE, 2) @@ -1344,7 +1344,7 @@ __kernel void gemm_mm_qs16(IMAGE_DECLARATION(src0), #endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 // This for loop performs 4 accumulations per iteration - for(; src_addr.s0 <= (end_row_vec_a - 2 * sizeof(short)); src_addr += (int2)(2 * sizeof(short), 2 * src1_stride_y)) + for(; src_addr.s0 <= (end_row_vec_a - 2 * (int)sizeof(short)); src_addr += (int2)(2 * sizeof(short), 2 * src1_stride_y)) { short2 a0 = vload2(0, (__global short *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y)); #if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 @@ -1629,7 +1629,7 @@ __kernel void gemm_lc_vm_f32(IMAGE_DECLARATION(src0), float4 acc = 0.0f; - for(; src_addr.s0 <= (end_row_vec_a - 2 * sizeof(float)); src_addr += (int2)(2 * sizeof(float), 2 * src1_stride_y)) + for(; src_addr.s0 <= (end_row_vec_a - 2 * (int)sizeof(float)); src_addr += (int2)(2 * sizeof(float), 2 * src1_stride_y)) { float2 a0 = vload2(0, (__global float *)(src0_ptr + src_addr.s0)); float4 b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1)); diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp index ee1558fe71..03d5dbdfd1 100644 --- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp @@ -85,7 +85,7 @@ void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *w { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); - ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() != 2); + ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 2); _are_weights_reshaped = transpose_weights ? are_weights_reshaped : true; _is_fc_after_conv = true; diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp index 2e8d10598d..fc04e28972 100644 --- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp @@ -133,7 +133,7 @@ void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weigh const int num_input_dimensions = input->info()->tensor_shape().num_dimensions() - num_batch_dimensions; const size_t linear_input_size = input->info()->tensor_shape().total_size_lower(num_input_dimensions); - _linearize_input = input->info()->tensor_shape().x() != linear_input_size; + _linearize_input = (input->info()->tensor_shape().x() != linear_input_size) || (num_input_dimensions > 1 && linear_input_size == 1); _are_weights_reshaped = are_weights_reshaped; _accumulate_biases = biases != nullptr; _is_batched_fc_layer = num_batch_dimensions > 0; diff --git a/tests/datasets/FullyConnectedLayerDataset.h b/tests/datasets/FullyConnectedLayerDataset.h index 5789954e85..9f8089d81a 100644 --- a/tests/datasets/FullyConnectedLayerDataset.h +++ b/tests/datasets/FullyConnectedLayerDataset.h @@ -120,10 +120,16 @@ public: SmallFullyConnectedLayerDataset() { // Conv -> FC + add_config(TensorShape(1U, 1U, 1U, 3U), TensorShape(1U, 10U), TensorShape(10U), TensorShape(10U, 3U)); + // Conv -> FC add_config(TensorShape(9U, 5U, 7U), TensorShape(315U, 271U), TensorShape(271U), TensorShape(271U)); // Conv -> FC (batched) add_config(TensorShape(9U, 5U, 7U, 3U), TensorShape(315U, 271U), TensorShape(271U), TensorShape(271U, 3U)); // FC -> FC + add_config(TensorShape(1U), TensorShape(1U, 10U), TensorShape(10U), TensorShape(10U)); + // FC -> FC (batched) + add_config(TensorShape(1U, 3U), TensorShape(1U, 10U), TensorShape(10U), TensorShape(10U, 3U)); + // FC -> FC add_config(TensorShape(201U), TensorShape(201U, 529U), TensorShape(529U), TensorShape(529U)); // FC -> FC (batched) add_config(TensorShape(201U, 3U), TensorShape(201U, 529U), TensorShape(529U), TensorShape(529U, 3U)); |