#include <aie_api/aie_adf.hpp>
#include "kernel.hpp"
template<unsigned id>
void SecondOrderSection(
adf::input_buffer<float> & __restrict idata, // 8 input samples per iteration
adf::output_buffer<float> & __restrict odata, // 8 output samples per iteration
const float (&C_e)[48], // run-time parameter: SIMD matrix of coefficients (even columns)
const float (&C_o)[48] // run-time parameter: SIMD matrix of coefficients (odd columns)
) {
static v8float state_reg = null_v8float();
// input/output iterators
auto inIter = aie::begin_vector<8>(idata);
auto outIter = aie::begin_vector<8>(odata);
for (auto i = 0; i < burst_cnt; i++) {
v8float xreg_hi = *inIter++;
v16float xreg = concat(state_reg, xreg_hi);
v8float acc_e = null_v8float();
v8float acc_o = null_v8float();
v8float *ptr_coeff_e = (v8float *)(&C_e[0]);
v8float *ptr_coeff_o = (v8float *)(&C_o[0]);
for (auto j = 0; j < 6; j++)
chess_flatten_loop
{
acc_e = fpmac(acc_e, xreg, (2 * j + 4), 0, *ptr_coeff_e++, 0, 0x76543210); // even columns
acc_o = fpmac(acc_o, xreg, (2 * j + 5), 0, *ptr_coeff_o++, 0, 0x76543210); // odd columns
} // end for (auto j = 0; j < 6; j++)
acc_o = fpadd(acc_o, acc_e);
*outIter++ = acc_o;
// update states
state_reg = xreg_hi;
state_reg = upd_elem(state_reg, 4, ext_elem(acc_o, 6));
state_reg = upd_elem(state_reg, 5, ext_elem(acc_o, 7));
} // end for (auto i = 0; i < burst_cnt; i++)
} // end SecondOrderSection()
Note:
The use of the
chess_flatten_loop
pragma. This pragma unrolls the loop completely, eliminating the loop construct. Documentation on compiler pragmas can be found in the AI Engine Lounge.In the code provided, selecting between API and LLI is performed by defining or commenting out
USE_API
on line 17 ofkernel.hpp
.
The generated assembly code is as follows: