C++ XRT API for Multi-Process Support - 2021.2 English

Versal ACAP AI Engine Programming Environment User Guide (UG1076)

Document ID
UG1076
ft:locale
English (United States)
Release Date
2021-12-17
Version
2021.2 English

The XRT C++ API extends xrt::aie::device class to support access mode in https://github.com/Xilinx/XRT/blob/master/src/runtime_src/core/include/xrt/xrt_aie.h as shown in the following code.

namespace xrt { namespace aie {

/**
* @enum access_mode - AIE array access mode
*
* @var exclusive
* Exclusive access to AIE array. No other process will have
* access to the AIE array.
* @var primary
* Primary access to AIE array provides same capabilities as exclusive
* access, but other processes will be allowed shared access as well.
* @var shared
* Shared none destructive access to AIE array, a limited number of APIs
* can be called.
* @var none
* For internal use only, to be removed.
*
* By default the AIE array is opened in primary access mode.
*/
enum class access_mode : uint8_t { exclusive = 0, primary = 1, shared = 2, none = 3 };

class device : public xrt::device
{
public:
  using access_mode = xrt::aie::access_mode;

  /**
  * device() - Construct device with specified access mode
  *
  * @param args
  * Arguments to construct a device (xrt_device.h).
  * @param am
  * Open the AIE device is specified access mode (default primary)
  *
  * The default access mode is primary.
  */
  template 
  device(ArgType&& arg, access_mode am = access_mode::primary)
    : xrt::device(std::forward(arg))
  {
    open_context(am);
  }
...
};

}} // namespace aie, xrt

The XRT C++ API extends xrt::graph class to support access mode in https://github.com/Xilinx/XRT/blob/master/src/runtime_src/core/include/xrt/xrt_graph.h as shown in the following code.

namespace xrt {

class graph
{
public:
/**
* @enum access_mode - graph access mode
*
* @var exclusive
* Exclusive access to graph and all graph APIs. No other process
* will have access to the graph.
* @var primary
* Primary access to graph provides same capabilities as exclusive
* access, but other processes will be allowed shared access as well.
* @var shared
* Shared none destructive access to graph, a limited number of APIs
* can be called.
*
* By default a graph is opened in primary access mode.
*/
  enum class access_mode : uint8_t { exclusive = 0, primary = 1, shared = 2 };

  /**
  * graph() - Constructor from a device, xclbin and graph name
  *
  * @param device
  * Device on which the graph should execute
  * @param xclbin_id
  * UUID of the xclbin with the graph
  * @param name
  * Name of graph to construct
  * @param am
  * Open the graph with specified access (default primary)
  */
  graph(const xrt::device& device, const xrt::uuid& xclbin_id, const std::string& name,
    access_mode am = access_mode::primary);

...
};

} // namespace xrt

The corresponding C++ version of sample code is as follows.

#include <stdlib.h>
#include <fstream>
#include <iostream>
#include <unistd.h>
#include <sys/wait.h>
#include "adf/adf_api/XRTConfig.h"
#include "experimental/xrt_aie.h"
#include "experimental/xrt_graph.h"
#include "experimental/xrt_kernel.h"

#include "graph.cpp"

//8192 matches 32 iterations of graph::run
#define OUTPUT_SIZE 8192
int value1[16] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
int value2[16] = {-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16};

using namespace adf;

int run(int argc, char* argv[],int id){
	std::cout<<"Child process "<<id<<" start"<<std::endl;
	
	//TARGET_DEVICE macro needs to be passed from gcc command line
	if(argc != 2) {
		std::cout << "Usage: " << argv[0] <<" <xclbin>" << std::endl;
		return EXIT_FAILURE;
	}
	char* xclbinFilename = argv[1];
	std::string graph_name=std::string("gr[")+std::to_string(id)+"]";
	std::string rtp_inout_name=std::string("gr[")+std::to_string(id)+std::string("].k.inout[0]");
	
	int ret;
	int value_readback[16]={0};
	if(fork()==0){//child child process
		xrt::aie::device device{0, xrt::aie::device::access_mode::shared};
		auto uuid = device.load_xclbin(xclbinFilename);
		xrt::graph graph{device, uuid, graph_name, xrt::graph::access_mode::shared};

		graph.read(rtp_inout_name, value_readback);
		std::cout<<"Add value read back are:";
		for(int i=0;i<16;i++){
			std::cout<<value_readback[i]<<",\t";
		}
		std::cout<<std::endl;
		std::cout<<"child child process exit"<<std::endl;
		exit(0);
	}

	xrt::aie::device device{0};   // default primary context
	auto uuid = device.load_xclbin(xclbinFilename);
	xrt::graph graph{device, uuid, graph_name}; // default primary context

	std::string rtp_in_name=std::string("gr[")+std::to_string(id)+std::string("].k.in[1]");
	graph.update(rtp_in_name, value1);
	graph.run(16); // 16 iterations

	graph.wait(0); // wait 0 => wait till graph is done
	std::cout<<"Graph wait done"<<std::endl;
			
	//second run
	graph.update(rtp_in_name.data(), value2);
	graph.run(16); // 16 iterations;

	while(wait(NULL)>0){//Wait for child child process
	}

	graph.wait(0); // wait 0 => wait till graph is done
	std::cout<<"Child process:"<<id<<" done"<<std::endl;
	return 0;
}

int main(int argc, char* argv[])
{
	try {
		for(int i=0;i<GRAPH_NUM;i++){
			if(fork()==0){//child
				auto match = run(argc, argv,i);
				std::cout << "TEST child " <<i<< (match ? " FAILED" : " PASSED") << "\n";
				return (match ? EXIT_FAILURE :  EXIT_SUCCESS);
			}else{
				size_t output_size_in_bytes = OUTPUT_SIZE * sizeof(int);
				//TARGET_DEVICE macro needs to be passed from gcc command line
				if(argc != 2) {
					std::cout << "Usage: " << argv[0] <<" <xclbin>" << std::endl;
					return EXIT_FAILURE;
				}
				char* xclbinFilename = argv[1];
				
				int ret;
				// Open xclbin
				auto device = xrt::device(0); //device index=0
				auto uuid = device.load_xclbin(xclbinFilename);
			
				// s2mm & data_generator kernel handle
				std::string s2mm_kernel_name=std::string("s2mm:{s2mm_")+std::to_string(i+1)+std::string("}");
				xrt::kernel s2mm = xrt::kernel(device, uuid, s2mm_kernel_name.data());
				std::string data_generator_kernel_name=std::string("data_generator:{data_generator_")+std::to_string(i+1)+std::string("}");
				xrt::kernel data_generator = xrt::kernel(device, uuid, data_generator_kernel_name.data());
			
				// output memory
				auto out_bo=xrt::bo(device, output_size_in_bytes,s2mm.group_id(0));
				auto host_out=out_bo.map<int*>();
				auto s2mm_run = s2mm(out_bo, nullptr, OUTPUT_SIZE);//1st run for s2mm has started
				auto data_generator_run = data_generator(nullptr, OUTPUT_SIZE);

				// wait for s2mm done
				std::cout<<"Waiting s2mm to complete"<<std::endl;
				auto state = s2mm_run.wait();
				std::cout << "s2mm "<<" completed with status(" << state << ")"<<std::endl;
				out_bo.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
				
				int match = 0;
				int counter=0;
				for (int i = 0; i < OUTPUT_SIZE/2/16; i++) {
					for(int j=0;j<16;j++){
						if(host_out[i*16+j]!=counter+value1[j]){
							std::cout<<"ERROR: num="<<i*16+j<<" out="<<host_out[i*16+j]<<std::endl;
							match=1;
							break;
						}
						counter++;
					}
				}
				for(int i=OUTPUT_SIZE/2/16;i<OUTPUT_SIZE/16;i++){
					for(int j=0;j<16;j++){
						if(host_out[i*16+j]!=counter+value2[j]){
							std::cout<<"ERROR: num="<<i*16+j<<" out="<<host_out[i*16+j]<<std::endl;
							match=1;
							break;
						}
						counter++;
					}
				}

				std::cout << "TEST " <<i<< (match ? " FAILED" : " PASSED") << "\n";
				while(wait(NULL)>0){//Wait for all child process
				}
				std::cout<<"all done"<<std::endl;
				return (match ? EXIT_FAILURE :  EXIT_SUCCESS);
			}
		}
	}	
		catch (std::exception const& e) {
		std::cout << "Exception: " << e.what() << "\n";
		std::cout << "FAILED TEST\n";
		return 1;
	}
}