//
// This confidential and proprietary software may be used only as
// authorised by a licensing agreement from ARM Limited
// (C) COPYRIGHT 2020-2022 ARM Limited
// ALL RIGHTS RESERVED
// The entire notice above must be reproduced on all authorised
// copies and copies may only be made to the extent permitted
// by a licensing agreement from ARM Limited.

=== Data Layout

==== CONCAT
Concatenate a list of tensors along a given axis.
No data conversion happens during a concat operation.

*Arguments:*

|===
|Argument|Type|Name|Shape|Description

|Input|in_out_t*|input1|shapes1[]|List of input tensors. All inputs must have the same rank and data type
|Attribute|int32_t|axis|-|Axis along which concatenation is to occur, in range from 0 to rank(shape)-1
|Output|in_out_t*|output|shape|Output tensor
|===

*Operation Function:*

[source,c]
----
ERROR_IF(axis < 0 || axis >= rank(shapes1[0]));
ERROR_IF(shape[axis] != sum(shape1[k][axis] for all k))
// The following checks ensure all inputs are compatible for concatenation
for_each(input_shape in shapes1) {
    ERROR_IF(rank(input_shape) != rank(shapes1[0]));
    for_each(index in input_shape) {
        ERROR_IF(input_shape[index] != shapes1[0][index] && index != axis);
    }
}
for_each(index1 in shape) {
    index2 = index1;
    for (tensor t = 0; t < length(input1); t++) {
        // Continue to concatenate along axis from each tensor
        // For each output location, we are looking for the
        // appropriate input tensor
        if (index2[axis] >= 0 && index2[axis] < shapes1[t][axis]) {
            in_out_t value = tensor_read<in_out_t>(input1[t], shapes1[t], index2);
            tensor_write<in_out_t>(output, shape, index1, value);
        }
        index2[axis] = index2[axis] - shapes1[t][axis];
    }
}

----

*Supported Data Types:*

|===
|Profile|Mode|in_out_t

|Any|Boolean|bool_t
|Any|signed 8|int8_t
|Any|signed 16|int16_t
|Any|signed 32|int32_t
|MI, MT|fp16|fp16_t
|MI, MT|bf16|bf16_t
|MI, MT|fp32|fp32_t
|===

==== PAD

Pads a tensor along the borders of each dimension with a supplied value.
Returns a new tensor with the padding included.
The pad_const value includes the zero point if the tensor uses a zero point.

*Arguments:*

|===
|Argument|Type|Name|Shape|Description

|Input|in_out_t*|input1|shape1|Input tensor
|Attribute|int32_t|padding|[rank(shape1),2]|Number of pad elements at the start and end of each dimension
|Attribute|in_out_t|pad_const|-|Constant value to be used as padding
|Output|in_out_t*|output|shape|Output tensor of same type as the input tensor
|===

*Operation Function:*

[source,c++]
----
// Check output shape matches the padded input shape
ERROR_IF(rank(shape) != rank(shape1));
for (i = 0; i < rank(shape); i++) {
    ERROR_IF(padding[i,0] < 0 || padding[i,1] < 0);
    ERROR_IF(shape[i] != padding[i, 0] + shape1[i] + padding[i, 1]);
}
for_each(index in shape) {
    index1 = index;
    bool_t is_pad = false;
    for(i = 0; i < rank(shape); i++) {
        index1[i] = index1[i] - padding[i,0];
        if (index1[i] < 0 || index[i] >= length(shape[i])) {
            is_pad = true;
        }
    }
    in_out_t value = is_pad ? pad_const : tensor_read<in_out_t>(input1, shape1, index1);
    tensor_write<in_out_t>(output, shape, index, value);
}
----

*Supported Data Types:*

|===
|Profile|Mode|in_out_t

|Any|Boolean|bool_t
|Any|signed 8|int8_t
|Any|signed 16|int16_t
|Any|signed 32|int32_t
|MI, MT|fp16|fp16_t
|MI, MT|bf16|bf16_t
|MI, MT|fp32|fp32_t
|===

==== RESHAPE

Returns a tensor with the same type/values as the input, with a new shape specified by the shape argument. Reshape may operate on tensors of any rank. No data conversion happens during a reshape operation.

*Arguments:*

|===
|Argument|Type|Name|Shape|Description

|Input|in_out_t*|input1|shape1|Input tensor
|Attribute|int32_t|new_shape|[rank(shape)]|List of values, with each element giving the size of the result tensor for the given dimension.
|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor
|===

*Operation Function:*

[source,c++]
----
ERROR_IF(tensor_size(shape1) != tensor_size(shape));

for_each(index in shape) {
    // Calculate flattened index for the output location (index)
    size_t offset = tensor_index_to_offset(shape, index);
    // Now convert to the location in the input
    dim_t tmp_index = tensor_offset_to_index(shape1, offset);

    // Now read/write the value
    in_out_t val = tensor_read<in_out_t>(input, shape1, tmp_index);
    tensor_write<in_out_t>(output, shape, index, val);
}
----

*Supported Data Types:*

|===
|Profile|Mode|in_out_t

|Any|Boolean|bool_t
|Any|signed 8|int8_t
|Any|signed 16|int16_t
|Any|signed 32|int32_t
|MI, MT|fp16|fp16_t
|MI, MT|bf16|bf16_t
|MI, MT|fp32|fp32_t
|===

==== REVERSE

Returns a tensor with the same type/values as the input, with the data reversed along the given axis. No data conversion happens during a reverse operation.

*Arguments:*

|===
|Argument|Type|Name|Shape|Description

|Input|in_out_t*|input|shape|Input tensor from 1 to 4 dims
|Attribute|int32_t|axis|-|Axis to reverse, in range from 0 to rank(shape)-1
|Output|in_out_t*|output|shape|Output tensor. Same shape as input tensor.
|===

*Operation Function:*

[source,c++]
----
ERROR_IF(axis < 0 || axis >= rank(shape));
for_each(index in shape) {
    tmp_index = index;
    tmp_index[axis] = shape[axis] - 1 - index[axis];
    in_out_t value = tensor_read<in_out_t>(input, shape, tmp_index);
    tensor_write<in_out_t>(output, shape, index, value);
}
----

*Supported Data Types:*

|===
|Profile|Mode|in_out_t

|Any|Boolean|bool_t
|Any|signed 8|int8_t
|Any|signed 16|int16_t
|Any|signed 32|int32_t
|MI, MT|fp16|fp16_t
|MI, MT|bf16|bf16_t
|MI, MT|fp32|fp32_t
|===

==== SLICE

Extracts a slice of the input1 on the given axis, beginning at the start coordinates, and extending for size elements in each direction.
No data conversion happens during a slice operation.

*Arguments:*
|===
|Argument|Type|Name|Shape|Description

|Input|in_out_t*|input1|shape1|Input tensor with rank from 1 to 4
|Attribute|int32_t|start|[rank(input1)]|List of integer coordinates, of length equal to the rank of input1. Start coordinate for slicing.
|Attribute|int32_t|size|[rank(input1)]|List of integer size values, of length equal to the rank of input1. Size of the input to be used.
|Output|in_out_t*|output|shape|Output tensor of same type as the input tensor
|===

*Operation Function:*

[source,c++]
----
ERROR_IF(rank(input1) != length(start) || rank(input1) != length(size));
ERROR_IF(rank(input1) != rank(output))
// Sanity check the given coordinates, ensure start and end are
// within tensor bounds
for_each(index in rank(input1)) {
    ERROR_IF(start[index] < 0);
    ERROR_IF(size[index] <= 0); //Output must be positive size
    ERROR_IF(start[index] + size[index] > shape1[index]);
    ERROR_IF(shape[index] != size[index]);
}

for_each(index in shape) {
    tmp_index = index;
    for(i = 0; i < rank(shape); i++) {
       tmp_index[i] = index[i] + start[i];
    }
    in_out_t value = tensor_read<in_out_t>(input, shape1, tmp_index);
    tensor_write<in_out_t>(output, shape, index, value);
}
----

*Supported Data Types:*

|===
|Profile|Mode|in_out_t

|Any|Boolean|bool_t
|Any|signed 8|int8_t
|Any|signed 16|int16_t
|Any|signed 32|int32_t
|MI, MT|fp16|fp16_t
|MI, MT|bf16|bf16_t
|MI, MT|fp32|fp32_t
|===

==== TILE

Replicates input1 multiples times along each dimension.

*Arguments:*

|===
|Argument|Type|Name|Shape|Description

|Input|in_out_t*|input1|shape1|Input tensor with rank from 1 to 4
|Attribute|int32_t|multiples|[rank(shape1)]|Number of times to replicate input1 in each dimension
|Output|in_out_t*|output|shape|Output tensor of same type, rank as the input tensor
|===

*Operation Function:*

[source,c++]
----
for_each(index in shape) {
    tmp_index = index;
    for(i = 0; i < rank(shape); i++) {
        ERROR_IF(shape1[i] * multiples[i] != shape[i]);
        tmp_index[i] = index[i] % shape1[i];
    }
    in_out_t value = tensor_read<in_out_t>(input, shape1, tmp_index);
    tensor_write<in_out_t>(output, shape, index, value);
}
----

*Supported Data Types:*

|===
|Profile|Mode|in_out_t

|Any|Boolean|bool_t
|Any|signed 8|int8_t
|Any|signed 16|int16_t
|Any|signed 32|int32_t
|MI, MT|fp16|fp16_t
|MI, MT|bf16|bf16_t
|MI, MT|fp32|fp32_t
|===

==== TRANSPOSE

Permutes the dimensions of the input tensor input1 based on the perms argument.
Each value in the perms list must be a valid dimension of the input tensor and may not be repeated.

*Arguments:*

|===
|Argument|Type|Name|Shape|Description

|Input|in_out_t*|input1|shape1|Input tensor with minimum rank of one.
|Attribute|int32_t|perms|[rank(input1)]|List of integers of length equal to the rank of input1. Values must be valid dimensions within shape1, and may not be repeated.
|Output|in_out_t*|output|shape|Output tensor of same type, rank as the input tensor
|===

*Operation Function:*

[source,c++]
----
for_each(index in perms) {
    // Ensure each perms value is a valid value
    ERROR_IF(index >= rank(shape1));
    ERROR_IF(index < 0);
    // Ensure ranks aren't repeated
    ERROR_IF(indexes_used[index] == true);
    indexes_used[index] = true;
}

// Ensure that the output shapes have the properly
// permuted shapes
for(i = 0; i < rank(shape); i++) {
    ERROR_IF(shape1[perms[i]] != shape[i])
}

for_each(index in shape) {
    tmp_index = index;
    for(i = 0; i < rank(shape); i++) {
        tmp_index[perms[i]] = index[i]
    }
    in_out_t value = tensor_read<in_out_t>(input, shape1, tmp_index);
    tensor_write<in_out_t>(output, shape, index, value);
}
----

*Supported Data Types:*

|===
|Profile|Mode|in_out_t

|Any|Boolean|bool_t
|Any|signed 8|int8_t
|Any|signed 16|int16_t
|Any|signed 32|int32_t
|MI, MT|fp16|fp16_t
|MI, MT|bf16|bf16_t
|MI, MT|fp32|fp32_t
|===