CNN 中的典型运算类型 | 参数 | DPUCZDX8G_ISA1_B4096 3 (ZCU102 和 ZCU104) | DPUCAHX8L_ISA0(U50、U50LV 和 U280) | DPUCVDX8G_ISA3_C32B3 4 (VCK190) | DPUCAHX8H_ISA2_DWC 1 (U50、U55C、U50LV 和 U280) | DPUCADF8H_ISA0(U200 和 U250) | DPUCVDX8H_ISA1_F2W4_4PE 2 (VCK5000) | DPUCV2DX8G_ISA1_C20B1 5 (VEK280/V70) |
---|---|---|---|---|---|---|---|---|
内部参数 | channel_parallel:16 bank_depth:2048 bank_num:8 |
channel_parallel:32 bank_depth:4096 |
channel_parallel:16 bank_depth:8192 bank_num:8 |
channel_parallel:16 bank_depth:2048 |
channel_parallel:16 bank_depth:8192 |
channel_parallel:64 bank_depth:2048 |
channel_parallel:32 bank_depth:65528 bank_num:1 |
|
conv2d | Kernel size | w, h: [1, 16] | w, h: [1, 16] | w, h: [1, 16] w * h * ceil(input_channel/2048) <= 64 |
w, h: [1, 16] | w, h: [1, 16] | w, h: [1, 16] | w, h: [1, 16] 256 * h * w <= 13760 |
Strides | w, h: [1, 8] | w, h: [1, 4] | w, h: [1, 8] | w, h: [1, 4] | w, h: [1, 8] | w, h: [1, 4] | w, h: [1, 8] | |
Dilation | dilation * input_channel <= 256 * channel_parallel | |||||||
Paddings | pad_left, pad_right: [0, (kernel_w - 1) * dilation_w] | |||||||
pad_top, pad_bottom: [0, (kernel_h - 1) * dilation_h] | ||||||||
In Size | kernel_w * kernel_h * ceil(input_channel / channel_parallel) <= bank_depth | kernel_w * kernel_h * ceil(input_channel / channel_parallel) * ceil(channel_parallel / 4) + 4 <= bank_depth | ||||||
input_channel <= 256 * channel_parallel | input_channel <= 256 * channel_parallel | input_channel <= 256 * channel_parallel | ||||||
Out Size | output_channel <= 256 * channel_parallel | |||||||
Activation | ReLU、LeakyReLU、ReLU6、Hard-Swish 和 Hard-Sigmoid | ReLU 和 ReLU6 | ReLU、LeakyReLU、ReLU6、Hard-Swish 和 Hard-Sigmoid | ReLU、LeakyReLU 和 ReLU6 | ReLU 和 LeakyReLU | ReLU、LeakyReLU、ReLU6、Hard-Swish 和 Hard-Sigmoid | ReLU、LeakyReLU、ReLU6、Hard-Swish 和 Hard-Sigmoid | |
Group* (Caffe) | group==1 | |||||||
depthwise-conv2d | Kernel size | w, h: [1, 256] | w, h: [3] | w, h: [1, 256] | w, h: {1, 2, 3, 5, 7} | 不支持 | w, h: [1, 8] | w, h: [1, 256] h * w <= 431 |
Strides | w, h: [1, 256] | w, h: [1, 2] | w, h: [1, 256] | w, h: [1, 4] | w, h: [1, 4] | w, h: [1, 256] | ||
dilation | dilation * input_channel <= 256 * channel_parallel | dilation * input_channel <= 256 * channel_parallel | ||||||
Paddings | pad_left, pad_right: [0, min((kernel_w - 1), 15) * dilation_w] | pad_left, pad_right: [0, (kernel_w - 1) * dilation_w] | pad_left, pad_right: [0, min((kernel_w-1), 15) * dilation_w] | pad_left, pad_right: [0, (kernel_w - 1) * dilation_w] | pad_left, pad_right: [0, (kernel_w - 1) * dilation_w] | pad_left, pad_right: [0, min((kernel_w-1), 15) * dilation_w] | ||
pad_top, pad_bottom: [0, min((kernel_h - 1), 15) * dilation_h] | pad_top, pad_bottom: [0, (kernel_h - 1) * dilation_h] | pad_top, pad_bottom: [0, min((kernel_h-1), 15) * dilation_h] | pad_top, pad_bottom: [0, (kernel_h - 1) * dilation_h] | pad_top, pad_bottom: [0, (kernel_h - 1) * dilation_h] | pad_top, pad_bottom: [0, min((kernel_h-1), 15) * dilation_h] | |||
In Size | kernel_w * kernel_h * ceil(input_channel / channel_parallel) <= bank_depth | kernel_w * kernel_h * ceil(input_channel / channel_parallel) <= bank_depth | (6 * stride_w + kernel_w) * kernel_h + 4 <= 512 | |||||
Out Size | output_channel <= 256 * channel_parallel | output_channel <= 256 * channel_parallel | ||||||
Activation | ReLU、ReLU6、LeakyReLU6、Hard-Swish 和 Hard-Sigmoid | ReLU 和 ReLU6 | ReLU、ReLU6、LeakyReLU7、Hard-Swish 和 Hard-Sigmoid | ReLU 和 ReLU6 | ReLU 和 ReLU6 | ReLU、ReLU6、LeakyReLU、Hard-Swish 和 Hard-Sigmoid | ||
Group* (Caffe) | group==input_channel | group==input_channel | ||||||
transposed-conv2d | Kernel size | kernel_w/stride_w, kernel_h/stride_h: [1, 16] | ||||||
Strides | ||||||||
Paddings | pad_left, pad_right: [0, kernel_w-1] | |||||||
pad_top, pad_bottom: [0, kernel_h-1] | ||||||||
Out Size | output_channel <= 256 * channel_parallel | |||||||
Activation | ReLU、LeakyReLU、ReLU6、Hard-Swish 和 Hard-Sigmoid | ReLU 和 ReLU6 | ReLU、LeakyReLU、ReLU6、Hard-Swish 和 Hard-Sigmoid | ReLU、LeakyReLU 和 ReLU6 | ReLU 和 LeakyReLU | ReLU、LeakyReLU、ReLU6、Hard-Swish 和 Hard-Sigmoid | ReLU、LeakyReLU、ReLU6、Hard-Swish 和 Hard-Sigmoid | |
depthwise-transposed-conv2d | Kernel size | kernel_w/stride_w, kernel_h/stride_h: [1, 256] | kernel_w/stride_w, kernel_h/stride_h: [3] | kernel_w/stride_w, kernel_h/stride_h: [1, 256] | kernel_w/stride_w, kernel_h/stride_h: {1,2, 3, 5, 7} | 不支持 | kernel_w/stride_w, kernel_h/stride_h: [1, 8] | kernel_w/stride_w, kernel_h/stride_h: [1, 256] |
Strides | ||||||||
Paddings | pad_left, pad_right: [0, min((kernel_w-1), 15)] | pad_left, pad_right: [1, kernel_w-1] | pad_left, pad_right: [0, min((kernel_w-1),15)] | pad_left, pad_right: [1, kernel_w-1] | pad_left, pad_right: [1, kernel_w-1] | pad_left, pad_right: [0, min((kernel_w-1),15)] | ||
pad_top, pad_bottom: [0, min((kernel_h-1), 15)] | pad_top, pad_bottom: [1, kernel_h-1] | pad_top, pad_bottom: [0, min((kernel_h-1), 15)] | pad_top, pad_bottom: [1, kernel_h-1] | pad_top, pad_bottom: [1, kernel_h-1] | pad_top, pad_bottom: [0, min((kernel_h-1), 15)] | |||
Out Size | output_channel <= 256 * channel_parallel | output_channel <= 256 * channel_parallel | ||||||
Activation | ReLU、ReLU6、LeakyReLU6、Hard-Swish 和 Hard-Sigmoid | ReLU 和 ReLU6 | ReLU、ReLU6、LeakyReLU7、Hard-Swish 和 Hard-Sigmoid | ReLU 和 ReLU6 | ReLU 和 ReLU6 | ReLU、ReLU6、LeakyReLU、Hard-Swish 和 Hard-Sigmoid | ||
max-pooling | Kernel size | w, h: [1, 256] ceil(h/bank_num) * w <= bank_depth |
w, h: {2, 3, 5, 7, 8} | w, h: [1, 256] ceil(h/bank_num) * w <= bank_depth |
w, h: [1, 8] | w, h: [1, 16] | w, h: [1, 128] | w, h: [1, 256] h * w <= bank_depth |
Strides | w, h: [1, 256] | w, h: [1, 8] | w, h: [1, 256] | w, h: [1, 8] | w, h: [1, 8] | w, h: [1, 128] | w, h: [1, 256] | |
Paddings | pad_left, pad_right: [0, min((kernel_w-1), 15)] | pad_left, pad_right: [1, kernel_w-1] | pad_left, pad_right: [0, min((kernel_w-1), 15)] | pad_left, pad_right: [1, kernel_w-1] | pad_left, pad_right: [0, min((kernel_w-1), 15)] | |||
pad_top, pad_bottom: [0, min((kernel_h-1), 15)] | pad_top, pad_bottom: [1, kernel_h-1] | pad_top, pad_bottom: [0, min((kernel_h-1), 15)] | pad_top, pad_bottom: [1, kernel_h-1] | pad_top, pad_bottom: [0, min((kernel_h-1), 15)] | ||||
Activation | ReLU 和 ReLU6 | 不支持 | ReLU 和 ReLU6 | 不支持 | ReLU | 不支持 | ReLU 和 ReLU6 | |
average-pooling | Kernel size | w, h: [1, 256] ceil(h/bank_num) * w <= bank_depth |
w, h: {2, 3, 5, 7, 8} w==h |
w, h: [1, 256] ceil(h/bank_num) * w <= bank_depth |
w, h: [1, 8] w==h |
w, h: [1, 16] | w, h: [1, 128] w==h |
w, h: [1, 256] h * w <= bank_depth |
Strides | w, h: [1, 256] | w, h: [1, 8] | w, h: [1, 256] | w, h: [1, 8] | w, h: [1, 8] | w, h: [1, 128] | w, h: [1, 256] | |
Paddings | pad_left, pad_right: [0, min((kernel_w-1), 15)] | pad_left, pad_right: [1, kernel_w-1] | pad_left, pad_right: [0, min((kernel_w-1), 15)] | pad_left, pad_right: [1, kernel_w-1] | pad_left, pad_right: [0, min((kernel_w-1), 15)] | |||
pad_top, pad_bottom: [0, min((kernel_h-1), 15)] | pad_top, pad_bottom: [1, kernel_h-1] | pad_top, pad_bottom: [0, min((kernel_h-1), 15)] | pad_top, pad_bottom: [1, kernel_h-1] | pad_top, pad_bottom: [0, min((kernel_h-1), 15)] | ||||
Activation | ReLU 和 ReLU6 | 不支持 | ReLU 和 ReLU6 | 不支持 | ReLU | 不支持 | ReLU 和 ReLU6 | |
eltwise | type | sum, prod | sum | sum, prod | sum | sum | sum, prod | 2-input sum, prod |
Input Channel | input_channel <= 256 * channel_parallel | |||||||
Activation | ReLU | ReLU | ReLU | ReLU | ReLU | ReLU 和 Hard-Sigmoid | ReLU | |
concat | 特定于网络的限制,与特征图的大小、量化结果和编译器优化有关。 | |||||||
reorg | Strides | reverse==false : stride ^ 2 * input_channel <= 256 * channel_parallel reverse==true : input_channel <= 256 * channel_parallel |
||||||
pad | In Size | input_channel <= 256 * channel_parallel | ||||||
Mode | "SYMMETRIC"(在编译器最优化过程中,"CONSTANT" pad(value=0) 会被融合到相邻运算符中) | "SYMMETRIC", "CONSTANT"(所有填充值都相同) | "SYMMETRIC"(在编译器最优化过程中,"CONSTANT" pad(value=0) 会被融合到相邻运算符中) | |||||
global pooling | 全局池化将作为常规池化来处理,内核大小等于输入张量大小。 | |||||||
InnerProduct、Fully Connected 和 Matmul | 这些运算将会变换为 conv2d 运算 | |||||||
resize | scale | NEAREST:ceil(scale/bank_num) * scale * ceil(input_channel/channel_parallel) <= bank_depth BILINEAR:仅适用于 4-D 特征映射。这将变换为 pad 和 depthwise-transposed-conv2d。 TRILINEAR:仅适用于 5-D 特征映射。这将变换为 pad 和 transposed-conv3d。 |
||||||
mode | NEAREST 和 BILINEAR | NEAREST 和 BILINEAR | NEAREST、BILINEAR 和 TRILINEAR | NEAREST 和 BILINEAR | NEAREST 和 BILINEAR | NEAREST 和 BILINEAR | NEAREST 和 BILINEAR | |
conv3d | kernel size | 不支持 | 不支持 | w, h, d: [1, 16] w * h * ceil(ceil(input_channel/16) * 16 * d / 2048) <= 64 |
不支持 | 不支持 | 不支持 | 不支持 |
strides | w, h, d: [1, 8] | |||||||
paddings | pad_left, pad_right: [0, kernel_w-1] pad_top, pad_bottom: [0, kernel_h-1] pad_front, pad_back: [0, kernel_d-1] |
|||||||
In size | kernel_w * kernel_h * kernel_d * ceil(input_channel/channel_parallel) <= bank_depth, input_channel <= 256 * channel_parallel |
|||||||
Out size | output_channel <= 256 * channel_parallel | |||||||
Activation | ReLU、LeakyReLU、ReLU6、Hard-Swish 和 Hard-Sigmoid | |||||||
depthwise-conv3d | kernel size | 不支持 | 不支持 | w, h: [1, 256] d: [1, 16] |
不支持 | 不支持 | 不支持 | 不支持 |
strides | w, h: [1, 256] d=1 |
|||||||
paddings | pad_left, pad_right: [0, min((kernel_w-1), 15)] pad_top, pad_bottom: [0, min((kernel_h-1), 15)] pad_front, pad_back: [0, min((kernel_d-1), 15)] |
|||||||
In size | kernel_w * kernel_h * kernel_d * ceil(input_channel/channel_parallel) <= bank_depth | |||||||
Out size | output_channel <= 256 * channel_parallel | |||||||
Activation | ReLU 和 ReLU6 | |||||||
transposed-conv3d | kernel size | 不支持 | 不支持 | kernel_w/stride_w, kernel_h/stride_h, kernel_d/stride_d: [1, 16] | 不支持 | 不支持 | 不支持 | 不支持 |
strides | ||||||||
paddings | pad_left, pad_right: [0, kernel_w-1] pad_top, pad_bottom: [0, kernel_h-1] pad_front, pad_back: [0, kernel_d-1] |
|||||||
Out size | output_channel <= 256 * channel_parallel | |||||||
Activation | ReLU、LeakyReLU、ReLU6、Hard-Swish 和 Hard-Sigmoid | |||||||
depthwise-transposed-conv3d | kernel size | 不支持 | 不支持 | kernel_w/stride_w, kernel_h/stride_h, kernel_d/stride_d: [1, 16] | 不支持 | 不支持 | 不支持 | 不支持 |
strides | ||||||||
paddings | pad_left, pad_right: [0, min((kernel_w-1), 15)] pad_top, pad_bottom: [0, min((kernel_h-1), 15)] pad_front, pad_back: [0, min((kernel_d-1), 15)] |
|||||||
Out size | output_channel <= 256 * channel_parallel | |||||||
Activation | ReLU 和 ReLU6 | |||||||
Strided_slice | Stride |
Stride_batch = 1 Stride_channel = 1 |
||||||
correlation1d_elemwise | input size |
input_channel <= 256 * channel_parrallel |
不支持 |
input_channel <= 256 * channel_parrallel |
不支持 | 不支持 | 不支持 | 不支持 |
correlation2d_elemwise | input size |
input_channel <= 256 * channel_parrallel |
不支持 |
input_channel <= 256 * channel_parrallel |
不支持 | 不支持 | 不支持 | 不支持 |
argmax | axis | axis = input_channel | 不支持 | axis = input_channel | 不支持 | 不支持 | 不支持 | axis = input_channel |
input size | input_channel < =128 | input_channel < =128 | input_channel < =128 | |||||
reduction max | axis | axis = input_channel | 不支持 | axis = input_channel | 不支持 | 不支持 | 不支持 | axis = input_channel |
input size | input_channel < 2 ^ 12 | input_channel < 2 ^ 12 | input_channel < 2 ^ 12 | |||||
cost_volume | input size |
input_channel <= 256 * channel_parallel |
不支持 |
input_channel <= 256 * channel_parallel |
不支持 | 不支持 | 不支持 | 不支持 |
transpose | ||||||||
|