opencv
diff --git a/‎testdata/dnn/onnx/data/input_attention.npy‎
0 Bytes b/‎testdata/dnn/onnx/data/input_attention.npy‎
0 Bytes
diff --git a/‎testdata/dnn/onnx/data/output_attention.npy‎
0 Bytes b/‎testdata/dnn/onnx/data/output_attention.npy‎
0 Bytes
diff --git a/‎testdata/dnn/onnx/generate_onnx_models_with_onnxscript.py‎
Lines changed: 57 additions & 0 deletions b/‎testdata/dnn/onnx/generate_onnx_models_with_onnxscript.py‎
Lines changed: 57 additions & 0 deletions
diff --git a/‎testdata/dnn/onnx/models/attention.onnx‎
-39 Bytes b/‎testdata/dnn/onnx/models/attention.onnx‎
-39 Bytes
@@ -124,3 +124,60 @@ def two_resizes_with_shared_subgraphs(x: ost.FLOAT["batch", 1, "height", "width"
     return opset11.Add(resized_y, resized_z)
 
 make_model_and_data(two_resizes_with_shared_subgraphs, np.random.rand(1, 1, 4, 5).astype(np.float32), np.random.rand(1, 1, 3, 2).astype(np.float32), np.random.rand(1, 1, 2, 1).astype(np.float32))
+
+batch_size = 1
+sequence_length = 320
+input_hidden_size = 48
+qk_hidden_size = 48
+v_hidden_size = 48
+num_heads = 4
+qk_head_size = int(qk_hidden_size / num_heads)
+v_head_size = int(v_hidden_size / num_heads)
+attention_weight = np.random.rand(input_hidden_size, qk_hidden_size + qk_hidden_size + v_hidden_size).astype(np.float32)
+attention_bias = np.random.rand(qk_hidden_size + qk_hidden_size + v_hidden_size).astype(np.float32)
+
+@ost.script()
+def attention(x: ost.FLOAT[batch_size, sequence_length, input_hidden_size]) -> ost.FLOAT[batch_size, sequence_length, input_hidden_size]:
+    transpose = op.Transpose(x, perm=[1, 0, 2])
+    qkv_matmul_weight = op.Constant(value=onnx.helper.make_tensor("", onnx.TensorProto.FLOAT, attention_weight.shape, attention_weight))
+    qkv_matmul = op.MatMul(transpose, qkv_matmul_weight)
+
+    qkv_add_bias = op.Constant(value=onnx.helper.make_tensor("", onnx.TensorProto.FLOAT, attention_bias.shape, attention_bias))
+    qkv_add = op.Add(qkv_add_bias, qkv_matmul)
+
+    # q path
+    q_path_slice = op.Slice(qkv_add,
+                        op.Constant(value=onnx.helper.make_tensor("", onnx.TensorProto.INT64, [1], np.array([0], dtype=np.int64))),
+                        op.Constant(value=onnx.helper.make_tensor("", onnx.TensorProto.INT64, [1], np.array([qk_hidden_size], dtype=np.int64))),
+                        op.Constant(value=onnx.helper.make_tensor("", onnx.TensorProto.INT64, [1], np.array([-1], dtype=np.int64))))
+    q_path_reshape = op.Reshape(q_path_slice, op.Constant(value=onnx.helper.make_tensor("", onnx.TensorProto.INT64, [3], np.array([sequence_length, batch_size * num_heads, qk_head_size], dtype=np.int64))), allowzero=0)
+    q_path_transpose = op.Transpose(q_path_reshape, perm=[1, 0, 2])
+    q_path_div = op.Div(q_path_transpose, op.Constant(value=onnx.helper.make_tensor("", onnx.TensorProto.FLOAT, [], np.array([np.sqrt(qk_hidden_size)], dtype=np.float32))))
+    # k path
+    k_path_slice = op.Slice(qkv_add,
+                        op.Constant(value=onnx.helper.make_tensor("", onnx.TensorProto.INT64, [1], np.array([qk_hidden_size], dtype=np.int64))),
+                        op.Constant(value=onnx.helper.make_tensor("", onnx.TensorProto.INT64, [1], np.array([qk_hidden_size + qk_hidden_size], dtype=np.int64))),
+                        op.Constant(value=onnx.helper.make_tensor("", onnx.TensorProto.INT64, [1], np.array([-1], dtype=np.int64))))
+    k_path_reshape = op.Reshape(k_path_slice, op.Constant(value=onnx.helper.make_tensor("", onnx.TensorProto.INT64, [3], np.array([sequence_length, batch_size * num_heads, qk_head_size], dtype=np.int64))), allowzero=0)
+    k_path_transpose = op.Transpose(k_path_reshape, perm=[1, 2, 0])
+
+    # qk path
+    qk_matmul = op.MatMul(q_path_div, k_path_transpose)
+    qk_softmax = op.Softmax(qk_matmul)
+
+    # v path
+    v_path_slice = op.Slice(qkv_add,
+                        op.Constant(value=onnx.helper.make_tensor("", onnx.TensorProto.INT64, [1], np.array([qk_hidden_size + qk_hidden_size], dtype=np.int64))),
+                        op.Constant(value=onnx.helper.make_tensor("", onnx.TensorProto.INT64, [1], np.array([qk_hidden_size + qk_hidden_size + v_hidden_size], dtype=np.int64))),
+                        op.Constant(value=onnx.helper.make_tensor("", onnx.TensorProto.INT64, [1], np.array([-1], dtype=np.int64))))
+    v_path_reshape = op.Reshape(v_path_slice, op.Constant(value=onnx.helper.make_tensor("", onnx.TensorProto.INT64, [3], np.array([sequence_length, batch_size * num_heads, v_head_size], dtype=np.int64))), allowzero=0)
+    v_path_transpose = op.Transpose(v_path_reshape, perm=[1, 0, 2])
+
+    # matmul
+    matmul = op.MatMul(qk_softmax, v_path_transpose)
+    trans = op.Transpose(matmul, perm=[1, 0, 2])
+    reshape = op.Reshape(trans, op.Constant(value=onnx.helper.make_tensor("", onnx.TensorProto.INT64, [3], np.array([batch_size, sequence_length, v_hidden_size], dtype=np.int64))))
+
+    return reshape
+
+make_model_and_data(attention, np.random.rand(batch_size, sequence_length, input_hidden_size).astype(np.float32))