自动重启内核会一直运行直至复位。如果内核包含串流接口,那么主机代码应该能够使用软件复位功能停止内核。要定义此行为,必须使用全局配置选项在 Vitis HLS 中配置内核,如下所示:
config_interface -s_axilite_auto_restart_counter 1
config_interface -s_axilite_sw_reset
自动重启内核的内核代码类似于以下示例:
#include "ap_axi_sdata.h"
#include "hls_stream.h"
typedef ap_axis<32, 0, 0, 0> pkt;
extern "C" {
10 void krnl_stream_vdatamover(hls::stream<pkt> &in,
11 hls::stream<pkt> &out // Internal Stream
12 ) {
13 #pragma HLS interface ap_ctrl_chain port=return
14 bool eos = false;
15 vdatamover:
16 do {
17 // Reading a and b streaming into packets
18 pkt t1 = in.read();
19
20 // Packet for output
21 pkt t_out;
22
23 // Reading data from input packet
24 ap_uint<DWIDTH> in1 = t1.data;
25
26 // Vadd operation
27 ap_uint<DWIDTH> tmpOut = in1;
28
29 // Setting data and configuration to output packet
30 t_out.data = tmpOut;
31 t_out.last = t1.last;
32 t_out.keep = -1; // Enabling all bytes
33
34 // Writing packet to output stream
35 out.write(t_out);
36
37 if (t1.last) {
38 eos = true;
39 }
40 } while (eos == false);
使用 XRT 本机 API,主机应用要求使用 xrt::autostart
API 将内核设置为自动重启模式,如下所示。在主机应用中创建 XRT 运行对象,并在 autostart
模式中进行设置。这将指定 xrt::autostart{0}
,表示重新启动内核的迭代次数无限制。
由于内核包含串流接口,因此内核将不会自行停止,原因是串流接口没有启动和停止机制。在此情况下,主机应用可使用 abort()
方法显式停止内核。
// add(in1, in2, nullptr, data_size)
xrt::kernel add(device, uuid, "krnl_stream_vadd");
xrt::bo in1(device, data_size_bytes, add.group_id(0));
auto in1_data = in1.map<int*>();
xrt::bo in2(device, data_size_bytes, add.group_id(1));
auto in2_data = in2.map<int*>();
// mult(in3, nullptr, out, data_size)
xrt::kernel mult(device, uuid, "krnl_stream_vmult");
xrt::bo in3(device, data_size_bytes, mult.group_id(0));
auto in3_data = in3.map<int*>();
xrt::bo out(device, data_size_bytes, mult.group_id(2));
auto out_data = out.map<int*>();
xrt::kernel incr(device, uuid, "krnl_stream_vdatamover");
// create run objects for re-use in loop
xrt::run add_run(add);
xrt::run mult_run(mult);
std::cout <<"performing auto-restart mode with infinite auto restart"<<std::endl;
auto incr_run = incr(xrt::autostart{0}, nullptr, nullptr, nullptr, nullptr);
// computed expected result
std::vector<int> sw_out_data(data_size);
std::cout << " for loop started" <<std::endl;
bool error = false; // indicates error in any of the iterations
for (unsigned int cnt = 0; cnt < iter; ++cnt) {
// Create the test data and software result
for(size_t i = 0; i < data_size; ++i) {
in1_data[i] = static_cast<int>(i);
in2_data[i] = 2 * static_cast<int>(i);
in3_data[i] = static_cast<int>(i);
out_data[i] = 0;
sw_out_data[i] = (in1_data[i] + in2_data[i] + adder1 + adder2) * in3_data[i];
}
// sync test data to kernel
in1.sync(XCL_BO_SYNC_BO_TO_DEVICE);
in2.sync(XCL_BO_SYNC_BO_TO_DEVICE);
in3.sync(XCL_BO_SYNC_BO_TO_DEVICE);
// start the pipeline
add_run(in1, in2, nullptr, data_size);
mult_run(in3, nullptr, out, data_size);
// wait for the pipeline to finish
add_run.wait();
mult_run.wait();
// sync result from device to host
out.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
// compare with expected scalar adders
for (size_t i = 0 ; i < data_size; i++) {
if (out_data[i] != sw_out_data[i]) {
std::cout << "error in iteration = " << cnt
<< " expected output = " << sw_out_data[i]
<< " observed output = " << out_data[i]
<< " adder1 = " << adder1 - 1
<< " adder2 = " << adder2 + 1 << '\n';
throw std::runtime_error("result mismatch");
}
}
}
// auto restart kernel would require the host to stop it explicitly
incr_run.abort();
}