Scenario 2: Kernel Using Auto-Restart and Mailbox - 2021.2 English

Vitis Unified Software Platform Documentation: Application Acceleration Development (UG1393)

Document ID
UG1393
Release Date
2022-03-29
Version
2021.2 English

The mailbox feature provides the ability to have semi-synchronization with the host. The mailbox is a non-blocking mechanism that updates the kernel parameters. Any updates provided through the mailbox will be picked up the next time the kernel starts (from a kernel perspective).

This example kernel uses scalar values which will be programmed from the host application, and the kernel will pick them at the next kernel call. The scalars, adder1 and adder2, will be asynchronously updated from the host. Set the kernel in auto-restarting mode and enable the mailbox feature using the following Vitis HLS commands:
config_interface -s_axilite_mailbox both
config_interface -s_axilite_auto_restart_counter 1
config_interface -s_axilite_sw_reset

The example code as follows:

#define DWIDTH 32
11 
12 typedef ap_axiu<DWIDTH, 0, 0, 0> pkt;
13 
14 extern "C" {
15 void krnl_stream_vdatamover(hls::stream<pkt> &in,
16     ┆   ┆   ┆   ┆   ┆ hls::stream<pkt> &out,
17     ┆   ┆   ┆   ┆   ┆ int adder1,
18     ┆   ┆   ┆   ┆   ┆ int adder2
19     ┆   ┆   ┆   ┆   ┆ ) {
20
21 #pragma HLS interface ap_ctrl_chain port=return
22 #pragma HLS INTERFACE s_axilite port=adder2
25 #pramga  HLS port=adder1 stable
   #pramga  HLS port=adder2 stable
27 bool eos = false;
28 vdatamover:
29   do {
30     // Reading a and b streaming into packets
31     pkt t1 = in.read();
32 
33     // Packet for output
34     pkt t_out;
35 
36     // Reading data from input packet
37     ap_uint<DWIDTH> in1 = t1.data;
38 
39     // Vadd operation
40     ap_uint<DWIDTH> tmpOut = in1+adder1+adder2;
41 
42     // Setting data and configuration to output packet
43     t_out.data = tmpOut;
44     t_out.last = t1.last;
45     t_out.keep = -1; // Enabling all bytes
46 
47     // Writing packet to output stream
48     out.write(t_out);
49 
50     if (t1.last) {
51     ┆ eos = true;
52     }
53   } while (eos == false);
54

Create a mailbox to update the scalars values adder1 and adder2.

Update the kernel parameters using the set_arg and write methods as shown below. The auto-restarting kernel will not stop itself because there is no start and stop for a streaming interface. It requires to be explicitly stopped or reset. The host code can explicitly stop the kernel from running using the abort() method.

// add(in1, in2, nullptr, data_size)
  xrt::kernel add(device, uuid, "krnl_stream_vadd");
  xrt::bo in1(device, data_size_bytes, add.group_id(0));
  auto in1_data = in1.map<int*>();
  xrt::bo in2(device, data_size_bytes, add.group_id(1));
  auto in2_data = in2.map<int*>();
 
  // mult(in3, nullptr, out, data_size)
  xrt::kernel mult(device, uuid, "krnl_stream_vmult");
  xrt::bo in3(device, data_size_bytes, mult.group_id(0));
  auto in3_data = in3.map<int*>();
  xrt::bo out(device, data_size_bytes, mult.group_id(2));
  auto out_data = out.map<int*>();
 
 
  xrt::kernel incr(device, uuid, "krnl_stream_vdatamover");
  int adder1 = 20;  // arbitrarily chosen to be different from 0
  int adder2 = 10;  // arbitrarily chosen to be different from 0
 
  // create run objects for re-use in loop
  xrt::run add_run(add);
  xrt::run mult_run(mult);
  std::cout <<"performing never-ending mode with infinite auto restart"<<std::endl;
  auto incr_run = incr(xrt::autostart{0}, nullptr, nullptr, adder1, adder2);

// create mailbox to programatically update the incr scalar adder
  xrt::mailbox incr_mbox(incr_run);
 
  // computed expected result
  std::vector<int> sw_out_data(data_size);
 
  std::cout << " for loop started" <<std::endl;
  bool error = false;   // indicates error in any of the iterations
  for (unsigned int cnt = 0; cnt < iter; ++cnt) {
 
    
    // Create the test data and software result
    for(size_t i = 0; i < data_size; ++i) {
      in1_data[i] = static_cast<int>(i);
      in2_data[i] = 2 * static_cast<int>(i);
      in3_data[i] = static_cast<int>(i);
      out_data[i] = 0;
      sw_out_data[i] = (in1_data[i] + in2_data[i] + adder1 + adder2) * in3_data[i];
    }
 
    // sync test data to kernel
    in1.sync(XCL_BO_SYNC_BO_TO_DEVICE);
    in2.sync(XCL_BO_SYNC_BO_TO_DEVICE);
    in3.sync(XCL_BO_SYNC_BO_TO_DEVICE);
 
    // start the pipeline
    add_run(in1, in2, nullptr, data_size);
    mult_run(in3, nullptr, out, data_size);
 
    // wait for the pipeline to finish
    add_run.wait();
    mult_run.wait();
 
    // prepare for next iteration, update the mailbox with the next
    // value of 'adder'.
    incr_mbox.set_arg(2, ++adder1); // update the mailbox
    incr_mbox.set_arg(3, --adder2); // update the mailbox
 
 
    // write the mailbox content to hw, the write will not be picked
    // up until the next iteration of the pipeline (incr).
    incr_mbox.write();  // requests sync of mailbox to hw
 
 
    // sync result from device to host
    out.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
 
    // compare with expected scalar adders
    for (size_t i = 0 ; i < data_size; i++) {
      if (out_data[i] != sw_out_data[i]) {
        std::cout << "error in iteration = " << cnt
                  << " expected output = " << sw_out_data[i]
                  << " observed output = " << out_data[i]
                  << " adder1 = " << adder1 - 1
                  << " adder2 = " << adder2 + 1 << '\n';
        throw std::runtime_error("result mismatch");
      }
    }
  }
        incr_run.abort();
}